Merge pull request #93 from DeBrosDAO/nightly

release: 0.122.47 — nightly → main
2026-06-17 08:04:13 +00:00 · 2026-06-11 17:37:20 +03:00
43 changed files with 323 additions and 3396 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-0.122.55
+0.122.47
--- a/core/migrations/031_refresh_token_custom_claims.sql
+++ b/core/migrations/031_refresh_token_custom_claims.sql
@ -1,15 +0,0 @@
-- =============================================================================
-- 031_refresh_token_custom_claims.sql
--
-- Carry the additive JWT custom claims (e.g. the namespace's account_id from
-- the auth-claims-provider hook, bugboard #548/#920) ALONGSIDE the refresh
-- token, so a rotated access token keeps the same claims without re-invoking
-- the namespace's claims-provider function on every 15-min refresh (the
-- refresh path is the latency-critical VoIP-wake path, bugboard #125).
--
-- Resolved once at /v1/auth/verify mint time, stored here, and replayed +
-- propagated across each rotation. NULL/absent = no custom claims (the default
-- for every namespace without a claims-provider) → fully backward compatible.
-- =============================================================================
-
-ALTER TABLE refresh_tokens ADD COLUMN custom_claims TEXT;
--- a/core/migrations/032_refresh_token_reuse_grace.sql
+++ b/core/migrations/032_refresh_token_reuse_grace.sql
@ -1,20 +0,0 @@
-- 032_refresh_token_reuse_grace.sql
--
-- Bugboard #125: bounded, single-use reuse grace for rotated refresh tokens.
--
-- Refresh-token rotation is single-use: a successful /v1/auth/refresh revokes
-- the presented token and issues a new one. If the rotation RESPONSE is lost
-- in transit (e.g. a reconnect storm during a gateway roll), the client is
-- left holding a just-revoked token and its retry dead-ends in a 401 -> SIWE.
-- On a VoIP-woken locked screen SIWE is impossible, so the call dies.
--
-- grace_used_at lets the gateway accept a just-rotated token ONE more time
-- within a short window (RFC 9700 §4.13.2 reuse grace) and mint a fresh
-- session, while the single-use CAS on this column prevents a stolen token
-- from being replayed repeatedly. NULL = grace not yet consumed.
--
-- Additive ALTER (rolling-upgrade safe): older gateways ignore the column;
-- newer ones read it back NULL for pre-existing rows, which is the correct
-- "grace available" default.
-
-ALTER TABLE refresh_tokens ADD COLUMN grace_used_at TIMESTAMP;
--- a/core/pkg/contracts/auth.go
+++ b/core/pkg/contracts/auth.go
@ -43,10 +43,9 @@ type AuthService interface {
 	// Verifies signature, expiration, and issuer.
 	ParseAndVerifyJWT(token string) (*JWTClaims, error)

-	// GenerateJWT creates a new signed JWT with the specified subject, TTL, and
-	// optional additive custom claims (nil = none; bugboard #548).
+	// GenerateJWT creates a new signed JWT with the specified claims and TTL.
 	// Returns: token, expirationUnix, error.
-	GenerateJWT(namespace, subject string, ttl time.Duration, custom map[string]string) (string, int64, error)
+	GenerateJWT(namespace, subject string, ttl time.Duration) (string, int64, error)

 	// RegisterApp registers a new client application with the gateway.
 	// Returns an application ID that can be used for OAuth flows.
--- a/core/pkg/gateway/auth/crypto.go
+++ b/core/pkg/gateway/auth/crypto.go
@ -4,7 +4,6 @@ import (
 	"crypto/hmac"
 	"crypto/sha256"
 	"encoding/hex"
-	"encoding/json"
 )

 // sha256Hex returns the lowercase hex-encoded SHA-256 hash of the input string.
@ -23,34 +22,3 @@ func HmacSHA256Hex(data, secret string) string {
 	mac.Write([]byte(data))
 	return hex.EncodeToString(mac.Sum(nil))
 }
-
-// marshalClaims serializes additive JWT custom claims for storage alongside a
-// refresh token (bugboard #548). Empty/nil → "" so the column stays NULL-ish
-// and absent claims read back as nil.
-func marshalClaims(m map[string]string) string {
-	if len(m) == 0 {
-		return ""
-	}
-	b, err := json.Marshal(m)
-	if err != nil {
-		return ""
-	}
-	return string(b)
-}
-
-// unmarshalClaims is the inverse of marshalClaims. An empty string or any
-// malformed value yields nil (fail-soft — a corrupt claims blob must never
-// break token rotation; the token simply rotates without custom claims).
-func unmarshalClaims(s string) map[string]string {
-	if s == "" {
-		return nil
-	}
-	var m map[string]string
-	if err := json.Unmarshal([]byte(s), &m); err != nil {
-		return nil
-	}
-	if len(m) == 0 {
-		return nil
-	}
-	return m
-}
--- a/core/pkg/gateway/auth/jwt.go
+++ b/core/pkg/gateway/auth/jwt.go
@ -182,22 +182,15 @@ func (s *Service) ParseAndVerifyJWT(token string) (*JWTClaims, error) {
 	return &claims, nil
 }

-// GenerateJWT mints a signed access token. `custom` carries additive
-// app-defined claims (e.g. the namespace's account_id from the claims-provider
-// hook, bugboard #548) under the top-level "custom" object — read back via
-// JWTClaims.Custom / oh.GetCallerClaim. Pass nil for none. Reserved claims
-// (sub/iss/aud/iat/nbf/exp/namespace) are always gateway-controlled and cannot
-// be overridden by `custom` (the caller is responsible for not putting
-// reserved keys here; the claims-provider path sanitizes them out upstream).
-func (s *Service) GenerateJWT(ns, subject string, ttl time.Duration, custom map[string]string) (string, int64, error) {
+func (s *Service) GenerateJWT(ns, subject string, ttl time.Duration) (string, int64, error) {
 	// Prefer EdDSA when available
 	if s.preferEdDSA && s.edSigningKey != nil {
-		return s.generateEdDSAJWT(ns, subject, ttl, custom)
+		return s.generateEdDSAJWT(ns, subject, ttl)
 	}
-	return s.generateRSAJWT(ns, subject, ttl, custom)
+	return s.generateRSAJWT(ns, subject, ttl)
 }

-func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration, custom map[string]string) (string, int64, error) {
+func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration) (string, int64, error) {
 	if s.edSigningKey == nil {
 		return "", 0, errors.New("EdDSA signing key unavailable")
 	}
@ -218,9 +211,6 @@ func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration, custom
 		"exp":       exp.Unix(),
 		"namespace": ns,
 	}
-	if len(custom) > 0 {
-		payload["custom"] = custom
-	}
 	pb, _ := json.Marshal(payload)
 	hb64 := base64.RawURLEncoding.EncodeToString(hb)
 	pb64 := base64.RawURLEncoding.EncodeToString(pb)
@ -230,7 +220,7 @@ func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration, custom
 	return signingInput + "." + sb64, exp.Unix(), nil
 }

-func (s *Service) generateRSAJWT(ns, subject string, ttl time.Duration, custom map[string]string) (string, int64, error) {
+func (s *Service) generateRSAJWT(ns, subject string, ttl time.Duration) (string, int64, error) {
 	if s.signingKey == nil {
 		return "", 0, errors.New("signing key unavailable")
 	}
@ -251,9 +241,6 @@ func (s *Service) generateRSAJWT(ns, subject string, ttl time.Duration, custom m
 		"exp":       exp.Unix(),
 		"namespace": ns,
 	}
-	if len(custom) > 0 {
-		payload["custom"] = custom
-	}
 	pb, _ := json.Marshal(payload)
 	hb64 := base64.RawURLEncoding.EncodeToString(hb)
 	pb64 := base64.RawURLEncoding.EncodeToString(pb)
--- a/core/pkg/gateway/auth/refresh_rotation_test.go
+++ b/core/pkg/gateway/auth/refresh_rotation_test.go
@ -31,19 +31,8 @@ type rotationMockORMDB struct {
 	client.DatabaseClient
 	mu             sync.Mutex
 	subjectByToken map[string]string // hashedToken -> subject (nil/missing = "invalid")
-	claimsByToken  map[string]string // hashedToken -> custom_claims JSON (bugboard #548)
-	// graceableTokens: hashedToken -> subject for tokens that are revoked but
-	// still inside the reuse-grace window (bugboard #125). The grace SELECT
-	// (detected by the grace_used_at predicate) reads from here.
-	graceableTokens map[string]string
 	inserted       int               // count of INSERTs (new refresh-token rows)
 	subjects       map[string]string // subject -> last hashed token inserted
-	// selectErrRemaining: number of upcoming "SELECT subject" calls that
-	// should return selectErr (simulates a transient rqlite leader outage).
-	// Decremented per matching call; 0 = serve normally (bugboard #125).
-	selectErr           error
-	selectErrRemaining  int
-	selectAttemptsTaken int
 }

 func (m *rotationMockORMDB) Query(_ context.Context, sql string, args ...interface{}) (*client.QueryResult, error) {
@ -56,58 +45,17 @@ func (m *rotationMockORMDB) Query(_ context.Context, sql string, args ...interfa
 	if containsCI(sql, "SELECT id FROM namespaces") {
 		return &client.QueryResult{Count: 1, Rows: [][]interface{}{{int64(1)}}}, nil
 	}
-	// Grace-path SELECT (bugboard #125): SELECT subject for a recently-revoked,
-	// grace-available token. Distinguished from the active-path SELECT by the
-	// grace_used_at predicate. Must be checked BEFORE the generic handler.
-	if containsCI(sql, "SELECT subject") && containsCI(sql, "FROM refresh_tokens") && containsCI(sql, "grace_used_at") {
-		if len(args) < 2 {
-			return &client.QueryResult{Count: 0}, nil
-		}
-		hashedTok, _ := args[1].(string)
-		if subj, ok := m.graceableTokens[hashedTok]; ok && subj != "" {
-			claims := ""
-			if m.claimsByToken != nil {
-				claims = m.claimsByToken[hashedTok]
-			}
-			return &client.QueryResult{Count: 1, Rows: [][]interface{}{{subj, claims}}}, nil
-		}
-		return &client.QueryResult{Count: 0}, nil
-	}
-	// SELECT subject (+ custom_claims, bugboard #548) for the lookup.
-	if containsCI(sql, "SELECT subject") && containsCI(sql, "FROM refresh_tokens") {
-		m.selectAttemptsTaken++
-		if m.selectErrRemaining > 0 {
-			m.selectErrRemaining--
-			return nil, m.selectErr
-		}
+	// SELECT subject for the refresh-token lookup.
+	if containsCI(sql, "SELECT subject FROM refresh_tokens") {
 		if len(args) < 2 {
 			return &client.QueryResult{Count: 0}, nil
 		}
 		hashedTok, _ := args[1].(string)
 		if subj, ok := m.subjectByToken[hashedTok]; ok && subj != "" {
-			claims := ""
-			if m.claimsByToken != nil {
-				claims = m.claimsByToken[hashedTok]
-			}
-			return &client.QueryResult{Count: 1, Rows: [][]interface{}{{subj, claims}}}, nil
+			return &client.QueryResult{Count: 1, Rows: [][]interface{}{{subj}}}, nil
 		}
 		return &client.QueryResult{Count: 0}, nil
 	}
-	// RevokeToken UPDATE that ALSO burns the grace slot (bugboard #125
-	// logout-bypass fix). Reflect it by clearing the token's grace eligibility
-	// so a follow-on grace SELECT misses it. (The rotation grace CAS goes
-	// through the rqlite Exec mock, not here, so there's no collision.)
-	if containsCI(sql, "UPDATE refresh_tokens") && containsCI(sql, "grace_used_at") && len(args) >= 2 {
-		if key, ok := args[1].(string); ok && m.graceableTokens != nil {
-			delete(m.graceableTokens, key) // single-token: key is the hashed token
-			for tok, subj := range m.graceableTokens {
-				if subj == key { // revoke-all: key is the subject
-					delete(m.graceableTokens, tok)
-				}
-			}
-		}
-		return &client.QueryResult{Count: 1}, nil
-	}
 	// INSERT new refresh_tokens row.
 	if containsCI(sql, "INSERT INTO refresh_tokens") {
 		m.inserted++
@ -123,14 +71,6 @@ func (m *rotationMockORMDB) Query(_ context.Context, sql string, args ...interfa
 				m.subjectByToken = map[string]string{}
 			}
 			m.subjectByToken[hashedTok] = subj
-			// custom_claims is the LAST arg (bugboard #548) — capture it so
-			// rotation-propagation tests can assert it carries forward.
-			if m.claimsByToken == nil {
-				m.claimsByToken = map[string]string{}
-			}
-			if cc, ok := args[len(args)-1].(string); ok {
-				m.claimsByToken[hashedTok] = cc
-			}
 		}
 		return &client.QueryResult{Count: 1}, nil
 	}
@ -149,12 +89,6 @@ type rotationMockRqlite struct {
 	rowsAffectedNext  []int64 // programmable per-call values; pop from front. Defaults to "revoke if unrevoked".
 	execErrNext       []error // programmable per-call errors
 	parallelExecGuard sync.Mutex
-	// graceCASNext: programmable RowsAffected for the grace CAS (UPDATE ... SET
-	// grace_used_at). 1 = won the single-use grace; 0 = already consumed
-	// (bugboard #125). Defaults to "win once per token".
-	graceCASNext  []int64
-	graceConsumed map[string]bool
-	graceCASCalls int
 }

 func (m *rotationMockRqlite) Exec(_ context.Context, sql string, args ...interface{}) (sql.Result, error) {
@ -175,29 +109,6 @@ func (m *rotationMockRqlite) Exec(_ context.Context, sql string, args ...interfa
 		}
 	}

-	// Grace CAS (bugboard #125): UPDATE ... SET grace_used_at, single-use.
-	if containsCI(sql, "SET grace_used_at") && len(args) >= 2 {
-		m.graceCASCalls++
-		hashedTok, _ := args[1].(string)
-		if m.graceConsumed == nil {
-			m.graceConsumed = map[string]bool{}
-		}
-		var affected int64
-		if len(m.graceCASNext) > 0 {
-			affected = m.graceCASNext[0]
-			m.graceCASNext = m.graceCASNext[1:]
-			if affected == 1 {
-				m.graceConsumed[hashedTok] = true
-			}
-		} else if !m.graceConsumed[hashedTok] {
-			m.graceConsumed[hashedTok] = true
-			affected = 1
-		} else {
-			affected = 0
-		}
-		return &rotationFakeResult{affected: affected}, nil
-	}
-
 	// Default UPDATE behavior: matches if token is currently unrevoked.
 	if containsCI(sql, "UPDATE refresh_tokens SET revoked_at") && len(args) >= 2 {
 		hashedTok, _ := args[1].(string)
@ -458,233 +369,3 @@ func TestRefreshToken_RotatedTokenReplayFails(t *testing.T) {
 		t.Fatal("expected error reusing rotated token, got nil")
 	}
 }
-
-// Bugboard #125: a TRANSIENT rqlite error on the lookup (leader briefly
-// unavailable during a rolling restart) must surface as ErrRefreshTransient
-// (→ 503, retryable) — NOT "invalid or expired" (→ 401, full SIWE re-auth,
-// impossible on a locked device answering a VoIP-woken call).
-func TestRefreshToken_transientSelectError_returnsTransient(t *testing.T) {
-	s, ormDB, _ := newRotationTestService(t)
-	const refresh = "valid-but-leader-down"
-	ormDB.subjectByToken[sha256Hex(refresh)] = "0xWALLET"
-	// Every lookup attempt across the whole retry window errors.
-	ormDB.selectErr = errors.New("rqlite: leadership lost")
-	ormDB.selectErrRemaining = 99
-
-	_, _, _, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
-	if !errors.Is(err, ErrRefreshTransient) {
-		t.Fatalf("err = %v, want ErrRefreshTransient (a valid token must not 401 during a leader outage)", err)
-	}
-}
-
-// The lookup is retried, so a brief blip recovers transparently within one
-// refresh call (no client-visible failure at all).
-func TestRefreshToken_selectRecoversAfterRetry(t *testing.T) {
-	s, ormDB, _ := newRotationTestService(t)
-	const refresh = "valid-blips-then-ok"
-	ormDB.subjectByToken[sha256Hex(refresh)] = "0xWALLET"
-	ormDB.selectErr = errors.New("rqlite: leadership lost")
-	ormDB.selectErrRemaining = refreshSelectRetries - 1 // fail all but the last attempt
-
-	access, newRefresh, subj, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
-	if err != nil {
-		t.Fatalf("RefreshToken should recover after transient blips: %v", err)
-	}
-	if access == "" || newRefresh == "" || subj != "0xWALLET" {
-		t.Errorf("recovered refresh incomplete: access=%q newRefresh=%q subj=%q", access, newRefresh, subj)
-	}
-}
-
-// A transient error on the CAS write (revoke) is also retryable, not a 401.
-func TestRefreshToken_transientUpdateError_returnsTransient(t *testing.T) {
-	s, ormDB, rq := newRotationTestService(t)
-	const refresh = "valid-cas-write-down"
-	ormDB.subjectByToken[sha256Hex(refresh)] = "0xWALLET"
-	rq.execErrNext = []error{errors.New("rqlite: write failed, no leader")}
-
-	_, _, _, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
-	if !errors.Is(err, ErrRefreshTransient) {
-		t.Fatalf("err = %v, want ErrRefreshTransient on a transient CAS write error", err)
-	}
-}
-
-// A genuinely unknown token must remain a hard invalid (401), NOT be masked as
-// transient — the distinction is the whole point of the #125 fix.
-func TestRefreshToken_unknownToken_isNotTransient(t *testing.T) {
-	s, _, _ := newRotationTestService(t)
-	_, _, _, _, err := s.RefreshToken(context.Background(), "never-existed", "anchat-test")
-	if err == nil {
-		t.Fatal("expected error for unknown token")
-	}
-	if errors.Is(err, ErrRefreshTransient) {
-		t.Errorf("unknown token must be a genuine invalid (401), not transient (503): %v", err)
-	}
-}
-
-// mockClaimsResolver is a fixed claims-provider stand-in for the mint tests.
-type mockClaimsResolver struct{ claims map[string]string }
-
-func (m mockClaimsResolver) ResolveClaims(_ context.Context, _, _ string) map[string]string {
-	return m.claims
-}
-
-// Bugboard #548: claims resolved at IssueTokens (login) must be stored with the
-// refresh token AND replayed into the rotated access token — so account_id
-// survives the 15-min refresh without re-invoking the provider.
-func TestRefreshToken_propagatesCustomClaims(t *testing.T) {
-	s, ormDB, _ := newRotationTestService(t)
-	s.SetClaimsResolver(mockClaimsResolver{claims: map[string]string{"account_id": "u-999"}})
-
-	// Login mint — IssueTokens resolves + stores the claims with the refresh row.
-	_, refresh, _, err := s.IssueTokens(context.Background(), "0xWALLET", "anchat-test")
-	if err != nil {
-		t.Fatalf("IssueTokens: %v", err)
-	}
-	if got := ormDB.claimsByToken[sha256Hex(refresh)]; got != `{"account_id":"u-999"}` {
-		t.Fatalf("claims not stored with refresh token; got %q", got)
-	}
-
-	// Refresh — the rotated access token must carry account_id, and the NEW
-	// refresh row must propagate the stored claims.
-	access, newRefresh, _, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
-	if err != nil {
-		t.Fatalf("RefreshToken: %v", err)
-	}
-	claims, err := s.ParseAndVerifyJWT(access)
-	if err != nil {
-		t.Fatalf("ValidateJWT: %v", err)
-	}
-	if claims.Custom["account_id"] != "u-999" {
-		t.Errorf("rotated access token lost account_id; custom=%v", claims.Custom)
-	}
-	if got := ormDB.claimsByToken[sha256Hex(newRefresh)]; got != `{"account_id":"u-999"}` {
-		t.Errorf("rotation did not propagate claims to the new row; got %q", got)
-	}
-
-	// Second rotation hop (N+1 → N+2): the claim must survive repeated
-	// rotations, not just the first — the propagation is the whole point.
-	access2, _, _, _, err := s.RefreshToken(context.Background(), newRefresh, "anchat-test")
-	if err != nil {
-		t.Fatalf("second RefreshToken: %v", err)
-	}
-	claims2, err := s.ParseAndVerifyJWT(access2)
-	if err != nil {
-		t.Fatalf("ParseAndVerifyJWT (2nd): %v", err)
-	}
-	if claims2.Custom["account_id"] != "u-999" {
-		t.Errorf("account_id lost across the second rotation; custom=%v", claims2.Custom)
-	}
-}
-
-// ----------------------------------------------------------------------------
-// Bugboard #125 — bounded, single-use refresh-token reuse grace (RFC 9700
-// §4.13.2). A rotation response lost in transit must NOT dead-end in a 401.
-// ----------------------------------------------------------------------------
-
-// A just-rotated token (revoked, within grace, grace not consumed) is accepted
-// ONCE more and mints a fresh session — recovering a client whose rotation
-// response was lost. The revoke CAS is skipped (the token is already revoked),
-// so this must NOT surface the replay tripwire.
-func TestRefreshToken_reuseGrace_recoversLostResponse(t *testing.T) {
-	s, ormDB, rq := newRotationTestService(t)
-
-	const lostTok = "rotated-but-response-lost"
-	// NOT in the active set (already revoked) ...
-	// ... but eligible for grace (revoked recently, grace unused).
-	ormDB.graceableTokens = map[string]string{sha256Hex(lostTok): "0xWALLET"}
-
-	access, newRefresh, subj, exp, err := s.RefreshToken(context.Background(), lostTok, "anchat-test")
-	if err != nil {
-		t.Fatalf("grace recovery should succeed, got error: %v", err)
-	}
-	if access == "" || newRefresh == "" {
-		t.Error("grace recovery must mint a fresh access + refresh token")
-	}
-	if newRefresh == lostTok {
-		t.Error("grace recovery must rotate to a NEW refresh token")
-	}
-	if subj != "0xWALLET" {
-		t.Errorf("subject = %q, want 0xWALLET", subj)
-	}
-	if exp <= 0 {
-		t.Errorf("expiration not set: %d", exp)
-	}
-	// The single-use grace CAS must have been claimed exactly once.
-	if rq.graceCASCalls != 1 {
-		t.Errorf("grace CAS calls = %d, want 1", rq.graceCASCalls)
-	}
-	// And a fresh refresh-token row was inserted.
-	if ormDB.inserted != 1 {
-		t.Errorf("expected 1 INSERT for the recovered session, got %d", ormDB.inserted)
-	}
-}
-
-// The grace is SINGLE-USE: once the grace_used_at CAS is lost (already
-// consumed, e.g. a replay after the legitimate client already recovered), the
-// token must 401 — a stolen token cannot be replayed at leisure.
-func TestRefreshToken_reuseGrace_singleUse_secondAttemptIs401(t *testing.T) {
-	s, ormDB, rq := newRotationTestService(t)
-
-	const tok = "already-grace-consumed"
-	ormDB.graceableTokens = map[string]string{sha256Hex(tok): "0xWALLET"}
-	// Force the grace CAS to report "already consumed".
-	rq.graceCASNext = []int64{0}
-
-	_, _, _, _, err := s.RefreshToken(context.Background(), tok, "anchat-test")
-	if err == nil {
-		t.Fatal("a consumed grace must NOT recover — expected an invalid-token error")
-	}
-	if !containsCI(err.Error(), "invalid or expired") {
-		t.Errorf("want invalid/expired 401, got %v", err)
-	}
-	if ormDB.inserted != 0 {
-		t.Errorf("no new session should be minted when grace is consumed; inserts=%d", ormDB.inserted)
-	}
-}
-
-// A genuinely bad token (not active AND not grace-eligible) still 401s — the
-// grace path must not turn unknown tokens into sessions.
-func TestRefreshToken_noGrace_genuineBadToken_stays401(t *testing.T) {
-	s, ormDB, _ := newRotationTestService(t)
-	// graceableTokens left empty: nothing is grace-eligible.
-
-	_, _, _, _, err := s.RefreshToken(context.Background(), "never-seen-this-token", "anchat-test")
-	if err == nil {
-		t.Fatal("a never-seen token must be rejected")
-	}
-	if !containsCI(err.Error(), "invalid or expired") {
-		t.Errorf("want invalid/expired 401, got %v", err)
-	}
-	if ormDB.inserted != 0 {
-		t.Errorf("no session should be minted for a bad token; inserts=%d", ormDB.inserted)
-	}
-}
-
-// Security regression (bugboard #125 logout-bypass): a token explicitly revoked
-// via RevokeToken (logout) must NOT be recoverable through the reuse grace, even
-// within the 60s window. RevokeToken burns grace_used_at so the grace predicate
-// (grace_used_at IS NULL) excludes it.
-func TestRevokeToken_burnsGrace_blocksLogoutBypass(t *testing.T) {
-	s, ormDB, _ := newRotationTestService(t)
-
-	const tok = "logged-out-token"
-	// Within the revoke window it WOULD be grace-eligible...
-	ormDB.graceableTokens = map[string]string{sha256Hex(tok): "0xWALLET"}
-
-	// ...until the user logs out.
-	if err := s.RevokeToken(context.Background(), "anchat-test", tok, false, ""); err != nil {
-		t.Fatalf("RevokeToken: %v", err)
-	}
-
-	// A refresh with the just-logged-out token must be rejected, not resurrected.
-	_, _, _, _, err := s.RefreshToken(context.Background(), tok, "anchat-test")
-	if err == nil {
-		t.Fatal("LOGOUT-BYPASS: a logged-out token was resurrected via reuse grace")
-	}
-	if !containsCI(err.Error(), "invalid or expired") {
-		t.Errorf("want 401 invalid/expired, got %v", err)
-	}
-	if ormDB.inserted != 0 {
-		t.Errorf("no session should be minted for a logged-out token; inserts=%d", ormDB.inserted)
-	}
-}
--- a/core/pkg/gateway/auth/service.go
+++ b/core/pkg/gateway/auth/service.go
@ -35,8 +35,7 @@ type Service struct {
 	edKeyID          string
 	preferEdDSA      bool
 	defaultNS        string
-	apiKeyHMACSecret string         // HMAC secret for hashing API keys before storage
-	claimsResolver   ClaimsResolver // namespace claims-provider hook (bugboard #548); nil = none
+	apiKeyHMACSecret string // HMAC secret for hashing API keys before storage
 }

 func NewService(logger *logging.ColoredLogger, orm client.NetworkClient, signingKeyPEM string, defaultNS string) (*Service, error) {
@ -85,28 +84,6 @@ func (s *Service) SetRqliteClient(db rqlite.Client) {
 	s.db = db
 }

-// ClaimsResolver resolves additive, namespace-defined JWT custom claims for an
-// authenticated wallet at token-mint time (bugboard #548/#920). The concrete
-// implementation invokes the namespace's reserved `auth-claims-provider`
-// serverless function; it MUST be fail-open (return nil, never error) so a
-// missing/slow/broken provider never breaks authentication. Injected via
-// SetClaimsResolver; nil = no custom claims (every namespace's default).
-type ClaimsResolver interface {
-	ResolveClaims(ctx context.Context, wallet, namespace string) map[string]string
-}
-
-// SetClaimsResolver wires the namespace claims-provider hook used at mint time.
-func (s *Service) SetClaimsResolver(r ClaimsResolver) { s.claimsResolver = r }
-
-// resolveCustomClaims returns the namespace's additive claims for this wallet,
-// or nil. Fail-open by contract — the resolver never errors.
-func (s *Service) resolveCustomClaims(ctx context.Context, wallet, namespace string) map[string]string {
-	if s.claimsResolver == nil {
-		return nil
-	}
-	return s.claimsResolver.ResolveClaims(ctx, wallet, namespace)
-}
-
 // ErrRotationNotConfigured is returned by RefreshToken when the service
 // wasn't given an rqlite client — refusing to rotate without atomicity
 // guarantees is safer than rotating non-atomically.
@ -247,13 +224,8 @@ func (s *Service) IssueTokens(ctx context.Context, wallet, namespace string) (st
 		return "", "", 0, fmt.Errorf("signing key unavailable")
 	}

-	// Resolve namespace-defined additive claims (bugboard #548) ONCE at mint
-	// time. Stored with the refresh token below and replayed across rotations
-	// so the 15-min refresh path never re-invokes the provider.
-	custom := s.resolveCustomClaims(ctx, wallet, namespace)
-
 	// Issue access token (15m)
-	token, expUnix, err := s.GenerateJWT(namespace, wallet, 15*time.Minute, custom)
+	token, expUnix, err := s.GenerateJWT(namespace, wallet, 15*time.Minute)
 	if err != nil {
 		return "", "", 0, fmt.Errorf("failed to generate JWT: %w", err)
 	}
@ -274,8 +246,8 @@ func (s *Service) IssueTokens(ctx context.Context, wallet, namespace string) (st
 	db := s.orm.Database()
 	hashedRefresh := sha256Hex(refresh)
 	if _, err := db.Query(internalCtx,
-		"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at, custom_claims) VALUES (?, ?, ?, ?, datetime('now', '+30 days'), ?)",
-		nsID, wallet, hashedRefresh, "gateway", marshalClaims(custom),
+		"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at) VALUES (?, ?, ?, ?, datetime('now', '+30 days'))",
+		nsID, wallet, hashedRefresh, "gateway",
 	); err != nil {
 		return "", "", 0, fmt.Errorf("failed to store refresh token: %w", err)
 	}
@ -293,34 +265,6 @@ func (s *Service) IssueTokens(ctx context.Context, wallet, namespace string) (st
 // This is the tripwire promised by RFC 9700 §4.12 (refresh-token rotation).
 var ErrRefreshTokenReplay = fmt.Errorf("refresh token already rotated or invalid")

-// ErrRefreshTransient is returned when refresh-token rotation fails for a
-// RETRYABLE reason — an rqlite-layer error rather than a genuine bad/expired
-// token. Bugboard #125: during a rolling gateway restart the rqlite leader is
-// briefly unavailable (re-election window), so the lookup/rotation errors;
-// collapsing that into "invalid token" forces a 401 → full SIWE re-auth, which
-// is impossible on a locked device answering a VoIP-woken call. Callers MUST
-// surface this as a retryable 503, NOT a 401, so the client retries within the
-// ring window instead of tearing down the session.
-var ErrRefreshTransient = fmt.Errorf("refresh token rotation temporarily unavailable")
-
-const (
-	// refreshSelectRetries bounds how many times the refresh lookup is retried
-	// when the rqlite read errors (transient leader unavailability). The read
-	// is idempotent and happens BEFORE any write, so retrying is safe.
-	refreshSelectRetries = 3
-	// refreshSelectRetryDelay is the backoff between lookup retries. Three
-	// tries × 250ms rides out a brief leader re-election without adding
-	// meaningful latency to the common (healthy-leader) path.
-	refreshSelectRetryDelay = 250 * time.Millisecond
-	// refreshReuseGrace is how long after a refresh token is rotated (revoked)
-	// the gateway will still accept it ONE more time, to recover a client whose
-	// rotation response was lost in transit — otherwise the retry dead-ends in a
-	// 401 → SIWE, impossible on a VoIP-woken locked screen (bugboard #125, RFC
-	// 9700 §4.13.2). Kept short, and single-use via grace_used_at, so a stolen
-	// token cannot be replayed at leisure.
-	refreshReuseGrace = 60 * time.Second
-)
-
 // RefreshToken validates the supplied refresh token, atomically rotates it
 // (revokes the old, mints a new), and returns a fresh access token alongside
 // the rotated refresh token.
@ -365,146 +309,58 @@ func (s *Service) RefreshToken(ctx context.Context, refreshToken, namespace stri

 	nsID, err := s.ResolveNamespaceID(ctx, namespace)
 	if err != nil {
-		// Bugboard #125: namespace resolution runs an rqlite query BEFORE the
-		// token lookup, so a leader re-election during a rolling restart fails
-		// here too. Treat it as retryable (→ 503), not a bad token (→ 401) —
-		// the refresh-path namespace comes from an already-authenticated
-		// session, so a resolution failure is a transient DB error, never the
-		// client's fault.
-		s.logger.ComponentWarn(logging.ComponentGeneral,
-			"refresh namespace resolution failed (transient, surfacing retryable)",
-			zap.String("namespace", namespace),
-			zap.Error(err))
-		return "", "", "", 0, ErrRefreshTransient
+		return "", "", "", 0, err
 	}

 	hashedRefresh := sha256Hex(refreshToken)

 	// Step 1: read the subject. Tells us who the token belongs to AND
 	// validates that it's currently usable (not revoked, not expired).
-	//
-	// Bugboard #125: distinguish a TRANSIENT rqlite error (leader briefly
-	// unavailable during a rolling restart) from a GENUINE token miss. The
-	// read is idempotent and pre-write, so we retry it a few times; only after
-	// exhausting retries do we surface ErrRefreshTransient (→ 503, client
-	// retries). An actual empty result (Count == 0) is a real bad/expired
-	// token → "invalid or expired" (→ 401). Collapsing the two used to 401 a
-	// valid session during every restart, defeating the VoIP-wake refresh.
-	selectQ := `SELECT subject, custom_claims FROM refresh_tokens
+	selectQ := `SELECT subject FROM refresh_tokens
 	            WHERE namespace_id = ? AND token = ?
 	              AND revoked_at IS NULL
 	              AND (expires_at IS NULL OR expires_at > datetime('now'))
 	            LIMIT 1`
-	var res *client.QueryResult
-	var selErr error
-	for attempt := 0; attempt < refreshSelectRetries; attempt++ {
-		res, selErr = ormDB.Query(internalCtx, selectQ, nsID, hashedRefresh)
-		if selErr == nil && res != nil {
-			break
-		}
-		if attempt < refreshSelectRetries-1 {
-			time.Sleep(refreshSelectRetryDelay)
-		}
+	res, err := ormDB.Query(internalCtx, selectQ, nsID, hashedRefresh)
+	if err != nil || res == nil || res.Count == 0 {
+		return "", "", "", 0, fmt.Errorf("invalid or expired refresh token")
 	}
-	if selErr != nil || res == nil {
-		// rqlite error persisted across retries — leader likely mid-election.
-		// Retryable, NOT an invalid token.
-		s.logger.ComponentWarn(logging.ComponentGeneral,
-			"refresh token lookup failed (transient rqlite error, surfacing retryable)",
-			zap.String("namespace", namespace),
-			zap.Error(selErr))
-		return "", "", "", 0, ErrRefreshTransient
-	}
-	// graceRecovery is set when the presented token was NOT in the active set
-	// but qualifies for the bugboard #125 single-use reuse grace (a just-
-	// rotated token whose rotation response was lost). In that case the old row
-	// is already revoked, so we SKIP the revoke CAS (step 2) — the grace CAS
-	// inside tryRefreshReuseGrace is our single-use lock — and go straight to
-	// minting a fresh session.
-	graceRecovery := false
-	var custom map[string]string
-	if res.Count == 0 {
-		gSubject, gCustom, gOK, gErr := s.tryRefreshReuseGrace(internalCtx, ormDB, nsID, hashedRefresh)
-		if gErr != nil {
-			// Transient rqlite error during the grace lookup/claim — retryable,
-			// not a verdict on the token (bugboard #125).
-			s.logger.ComponentWarn(logging.ComponentGeneral,
-				"refresh reuse-grace lookup failed (transient rqlite error, surfacing retryable)",
-				zap.String("namespace", namespace), zap.Error(gErr))
-			return "", "", "", 0, ErrRefreshTransient
+	if len(res.Rows) > 0 && len(res.Rows[0]) > 0 {
+		if val, ok := res.Rows[0][0].(string); ok {
+			subject = val
+		} else {
+			b, _ := json.Marshal(res.Rows[0][0])
+			_ = json.Unmarshal(b, &subject)
 		}
-		if !gOK {
-			// Genuinely not found / revoked outside grace / grace already
-			// consumed / expired — a real bad token.
-			return "", "", "", 0, fmt.Errorf("invalid or expired refresh token")
-		}
-		subject = gSubject
-		custom = gCustom
-		graceRecovery = true
-		s.logger.ComponentInfo(logging.ComponentGeneral,
-			"refresh token reuse-grace recovery (lost-response retry, single-use)",
-			zap.String("namespace", namespace), zap.String("subject", subject))
-	} else {
-		var customClaimsJSON string
-		if len(res.Rows) > 0 && len(res.Rows[0]) > 0 {
-			if val, ok := res.Rows[0][0].(string); ok {
-				subject = val
-			} else {
-				b, _ := json.Marshal(res.Rows[0][0])
-				_ = json.Unmarshal(b, &subject)
-			}
-			// custom_claims (bugboard #548) — resolved once at login, replayed on
-			// every rotation so the refresh path never re-invokes the provider.
-			if len(res.Rows[0]) > 1 {
-				if cc, ok := res.Rows[0][1].(string); ok {
-					customClaimsJSON = cc
-				}
-			}
-		}
-		custom = unmarshalClaims(customClaimsJSON)
 	}

 	// Step 2: atomic CAS — revoke the old row. RowsAffected is the lock.
 	// Two concurrent calls with the same refresh token: exactly one wins
 	// the UPDATE (RowsAffected == 1); the other sees RowsAffected == 0
 	// and bails with the replay tripwire.
-	//
-	// Skipped on a grace recovery (bugboard #125): the token is ALREADY
-	// revoked, so this CAS would always see RowsAffected == 0 and mis-fire the
-	// replay tripwire. The single-use grace CAS (grace_used_at) inside
-	// tryRefreshReuseGrace already served as the lock for this path.
-	if !graceRecovery {
-		updRes, err := s.db.Exec(internalCtx,
-			`UPDATE refresh_tokens SET revoked_at = datetime('now')
-			 WHERE namespace_id = ? AND token = ? AND revoked_at IS NULL`,
-			nsID, hashedRefresh)
-		if err != nil {
-			// rqlite write error (leader unavailable) — retryable, not a bad
-			// token. No row was revoked, so a client retry is safe (bugboard #125).
-			s.logger.ComponentWarn(logging.ComponentGeneral,
-				"refresh token revoke failed (transient rqlite error, surfacing retryable)",
-				zap.String("namespace", namespace),
-				zap.Error(err))
-			return "", "", "", 0, ErrRefreshTransient
-		}
-		affected, _ := updRes.RowsAffected()
-		if affected == 0 {
-			// Race lost OR replay attempt: token was unrevoked at step 1 but
-			// already revoked by step 2, meaning a concurrent call rotated it
-			// in between. Could be benign (same client retrying due to a
-			// transient network error) or malicious (stolen token + race).
-			// Either way: fail closed, log it, let the operator investigate.
-			s.logger.ComponentWarn(logging.ComponentGeneral,
-				"refresh token rotation: concurrent use detected (possible replay)",
-				zap.String("namespace", namespace),
-				zap.String("subject", subject))
-			return "", "", "", 0, ErrRefreshTokenReplay
-		}
+	updRes, err := s.db.Exec(internalCtx,
+		`UPDATE refresh_tokens SET revoked_at = datetime('now')
+		 WHERE namespace_id = ? AND token = ? AND revoked_at IS NULL`,
+		nsID, hashedRefresh)
+	if err != nil {
+		return "", "", "", 0, fmt.Errorf("revoke old refresh token: %w", err)
+	}
+	affected, _ := updRes.RowsAffected()
+	if affected == 0 {
+		// Race lost OR replay attempt: token was unrevoked at step 1 but
+		// already revoked by step 2, meaning a concurrent call rotated it
+		// in between. Could be benign (same client retrying due to a
+		// transient network error) or malicious (stolen token + race).
+		// Either way: fail closed, log it, let the operator investigate.
+		s.logger.ComponentWarn(logging.ComponentGeneral,
+			"refresh token rotation: concurrent use detected (possible replay)",
+			zap.String("namespace", namespace),
+			zap.String("subject", subject))
+		return "", "", "", 0, ErrRefreshTokenReplay
 	}

-	// Step 3: mint the new access JWT, carrying forward the stored custom
-	// claims so a rotated token keeps the same account_id etc. (bugboard #548).
-	accessToken, expUnix, err = s.GenerateJWT(namespace, subject, 15*time.Minute, custom)
+	// Step 3: mint the new access JWT.
+	accessToken, expUnix, err = s.GenerateJWT(namespace, subject, 15*time.Minute)
 	if err != nil {
 		return "", "", "", 0, fmt.Errorf("generate access token: %w", err)
 	}
@ -520,96 +376,15 @@ func (s *Service) RefreshToken(ctx context.Context, refreshToken, namespace stri
 	}
 	newRefreshToken = base64.RawURLEncoding.EncodeToString(rbuf)
 	hashedNew := sha256Hex(newRefreshToken)
-	// Re-marshal from the parsed map (not the raw stored string) so the new
-	// row and the freshly-minted access token are provably consistent and
-	// self-healing — a malformed stored blob converges to "" on both sides
-	// rather than being propagated forward verbatim. custom_claims is written
-	// ONLY here and in IssueTokens, both from a sanitized map (bugboard #548).
 	if _, err := ormDB.Query(internalCtx,
-		"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at, custom_claims) VALUES (?, ?, ?, ?, datetime('now', '+30 days'), ?)",
-		nsID, subject, hashedNew, "gateway", marshalClaims(custom)); err != nil {
-		// The old token is already revoked (step 2). A retryable error here
-		// leaves the client to re-attempt — which will re-auth since the old
-		// token is gone — but that's strictly better than masking a transient
-		// failure as a permanent 401 (bugboard #125). Surface retryable.
-		s.logger.ComponentWarn(logging.ComponentGeneral,
-			"refresh token store failed after revoke (transient rqlite error)",
-			zap.String("namespace", namespace),
-			zap.Error(err))
-		return "", "", "", 0, ErrRefreshTransient
+		"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at) VALUES (?, ?, ?, ?, datetime('now', '+30 days'))",
+		nsID, subject, hashedNew, "gateway"); err != nil {
+		return "", "", "", 0, fmt.Errorf("store rotated refresh token: %w", err)
 	}

 	return accessToken, newRefreshToken, subject, expUnix, nil
 }

-// tryRefreshReuseGrace implements the bounded, single-use reuse grace for a
-// rotated refresh token (bugboard #125, RFC 9700 §4.13.2). A token revoked
-// within refreshReuseGrace whose grace_used_at is still NULL is accepted ONCE
-// more — recovering a client that lost its rotation response in transit (a
-// reconnect storm during a gateway roll) before it dead-ends in a 401 → SIWE.
-//
-// Returns (subject, custom, true, nil) on a successful single-use grace claim;
-// (—, —, false, nil) when there is no eligible row, the token was revoked
-// outside the grace window, it has expired, or the grace was already consumed
-// (caller → 401). A non-nil error is a transient rqlite failure (caller → 503).
-//
-// Security: the grace is both short-windowed AND single-use (a CAS on
-// grace_used_at), so a stolen token cannot be replayed repeatedly; and it never
-// touches the concurrent-rotation replay tripwire, which fires on the active
-// path only.
-func (s *Service) tryRefreshReuseGrace(ctx context.Context, ormDB client.DatabaseClient, nsID interface{}, hashedRefresh string) (subject string, custom map[string]string, ok bool, err error) {
-	graceArg := fmt.Sprintf("-%d seconds", int(refreshReuseGrace.Seconds()))
-	sel := `SELECT subject, custom_claims FROM refresh_tokens
-	        WHERE namespace_id = ? AND token = ?
-	          AND revoked_at IS NOT NULL
-	          AND revoked_at > datetime('now', ?)
-	          AND grace_used_at IS NULL
-	          AND (expires_at IS NULL OR expires_at > datetime('now'))
-	        LIMIT 1`
-	res, qerr := ormDB.Query(ctx, sel, nsID, hashedRefresh, graceArg)
-	if qerr != nil {
-		return "", nil, false, qerr // transient rqlite error → caller 503
-	}
-	if res == nil || res.Count == 0 {
-		return "", nil, false, nil // no eligible grace row → caller 401
-	}
-
-	var customClaimsJSON string
-	if len(res.Rows) > 0 && len(res.Rows[0]) > 0 {
-		if v, vok := res.Rows[0][0].(string); vok {
-			subject = v
-		} else {
-			b, _ := json.Marshal(res.Rows[0][0])
-			_ = json.Unmarshal(b, &subject)
-		}
-		if len(res.Rows[0]) > 1 {
-			if cc, cok := res.Rows[0][1].(string); cok {
-				customClaimsJSON = cc
-			}
-		}
-	}
-	if subject == "" {
-		return "", nil, false, nil // defensive: never grace-mint an anonymous session
-	}
-
-	// Single-use CAS: claim the grace. Exactly one caller wins; a concurrent
-	// replay of the same just-revoked token sees RowsAffected == 0 → no grace.
-	// The same time-window predicate is repeated so the claim can't succeed on a
-	// row that aged out of the window between the SELECT and here.
-	updRes, uerr := s.db.Exec(ctx,
-		`UPDATE refresh_tokens SET grace_used_at = datetime('now')
-		 WHERE namespace_id = ? AND token = ? AND grace_used_at IS NULL
-		   AND revoked_at IS NOT NULL AND revoked_at > datetime('now', ?)`,
-		nsID, hashedRefresh, graceArg)
-	if uerr != nil {
-		return "", nil, false, uerr // transient
-	}
-	if affected, _ := updRes.RowsAffected(); affected == 0 {
-		return "", nil, false, nil // grace already consumed (concurrent) → caller 401
-	}
-	return subject, unmarshalClaims(customClaimsJSON), true, nil
-}
-
 // RevokeToken revokes a specific refresh token or all tokens for a subject
 func (s *Service) RevokeToken(ctx context.Context, namespace, token string, all bool, subject string) error {
 	internalCtx := client.WithInternalAuth(ctx)
@ -620,20 +395,14 @@ func (s *Service) RevokeToken(ctx context.Context, namespace, token string, all
 		return err
 	}

-	// Explicit revocation (logout / revoke-all) ALSO burns the reuse-grace slot
-	// (grace_used_at) so a deliberately-revoked token can NEVER be recovered by
-	// the bugboard #125 reuse grace. Rotation does not go through RevokeToken,
-	// so the legitimate lost-response grace path is unaffected; this only closes
-	// the logout-bypass where a just-logged-out token would otherwise be
-	// grace-eligible for the 60s window.
 	if token != "" {
 		hashedToken := sha256Hex(token)
-		_, err := db.Query(internalCtx, "UPDATE refresh_tokens SET revoked_at = datetime('now'), grace_used_at = datetime('now') WHERE namespace_id = ? AND token = ? AND revoked_at IS NULL", nsID, hashedToken)
+		_, err := db.Query(internalCtx, "UPDATE refresh_tokens SET revoked_at = datetime('now') WHERE namespace_id = ? AND token = ? AND revoked_at IS NULL", nsID, hashedToken)
 		return err
 	}

 	if all && subject != "" {
-		_, err := db.Query(internalCtx, "UPDATE refresh_tokens SET revoked_at = datetime('now'), grace_used_at = datetime('now') WHERE namespace_id = ? AND subject = ? AND revoked_at IS NULL", nsID, subject)
+		_, err := db.Query(internalCtx, "UPDATE refresh_tokens SET revoked_at = datetime('now') WHERE namespace_id = ? AND subject = ? AND revoked_at IS NULL", nsID, subject)
 		return err
 	}

--- a/core/pkg/gateway/auth/service_test.go
+++ b/core/pkg/gateway/auth/service_test.go
@ -112,7 +112,7 @@ func TestJWTFlow(t *testing.T) {
 	sub := "0x1234567890abcdef1234567890abcdef12345678"
 	ttl := 15 * time.Minute

-	token, exp, err := s.GenerateJWT(ns, sub, ttl, nil)
+	token, exp, err := s.GenerateJWT(ns, sub, ttl)
 	if err != nil {
 		t.Fatalf("GenerateJWT failed: %v", err)
 	}
@ -192,7 +192,7 @@ func TestEdDSAJWTFlow(t *testing.T) {
 	ttl := 15 * time.Minute

 	// With EdDSA preferred, GenerateJWT should produce an EdDSA token
-	token, exp, err := s.GenerateJWT(ns, sub, ttl, nil)
+	token, exp, err := s.GenerateJWT(ns, sub, ttl)
 	if err != nil {
 		t.Fatalf("GenerateJWT (EdDSA) failed: %v", err)
 	}
@ -233,7 +233,7 @@ func TestRS256BackwardCompat(t *testing.T) {

 	// Generate an RS256 token directly (simulating a legacy token)
 	s.preferEdDSA = false
-	token, _, err := s.GenerateJWT("test-ns", "user1", 15*time.Minute, nil)
+	token, _, err := s.GenerateJWT("test-ns", "user1", 15*time.Minute)
 	if err != nil {
 		t.Fatalf("GenerateJWT (RS256) failed: %v", err)
 	}
@ -447,7 +447,7 @@ func TestEdDSACrossServiceVerify(t *testing.T) {

 	const wantSub = "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE"
 	const wantNS = "anchat-test"
-	token, _, err := signer.GenerateJWT(wantNS, wantSub, 15*time.Minute, nil)
+	token, _, err := signer.GenerateJWT(wantNS, wantSub, 15*time.Minute)
 	if err != nil {
 		t.Fatalf("signer.GenerateJWT: %v", err)
 	}
@ -478,7 +478,7 @@ func TestEdDSACrossServiceVerify_differentKeysFail(t *testing.T) {
 	_, verKey, _ := ed25519.GenerateKey(rand.Reader)
 	verifier.SetEdDSAKey(verKey)

-	token, _, err := signer.GenerateJWT("ns", "sub", 15*time.Minute, nil)
+	token, _, err := signer.GenerateJWT("ns", "sub", 15*time.Minute)
 	if err != nil {
 		t.Fatalf("GenerateJWT: %v", err)
 	}
--- a/core/pkg/gateway/claims_provider.go
+++ b/core/pkg/gateway/claims_provider.go
@ -1,178 +0,0 @@
-package gateway
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"sort"
-	"sync"
-	"time"
-
-	"github.com/DeBrosOfficial/network/pkg/serverless"
-	"github.com/DeBrosOfficial/network/pkg/serverless/registry"
-	"go.uber.org/zap"
-)
-
-// Claims-provider hook (bugboard #548/#920).
-//
-// A namespace opts into additive, signed JWT claims by deploying a serverless
-// function with the RESERVED name "auth-claims-provider". At /v1/auth/verify
-// mint time the gateway invokes it (in the namespace's own context, so it can
-// read the namespace's tables) with {"wallet","namespace"} and merges the
-// string→string object it returns into the JWT's custom claims — e.g.
-// {"account_id":"<users.user_id>"} so push devices key on the stable account
-// identity rather than the authenticating wallet.
-//
-// Hard guarantees:
-//   - FAIL-OPEN: a missing / slow / erroring / malformed provider yields NO
-//     claims; authentication never breaks because a claims function is down.
-//   - Reserved claims (sub/iss/aud/iat/nbf/exp/namespace/custom) can never be
-//     set by the provider — the gateway controls those.
-//   - Bounded: timeout, max claim count, max total size.
-
-const (
-	// claimsProviderFnName is the reserved function name a namespace deploys to
-	// inject additive JWT claims at mint time.
-	claimsProviderFnName = "auth-claims-provider"
-	// claimsProviderTimeout bounds the provider invocation so a slow/hung
-	// function never stalls the auth path past this budget (fail-open after).
-	claimsProviderTimeout = 2 * time.Second
-	// maxCustomClaims / maxCustomClaimsBytes cap what a provider may inject —
-	// JWTs ride in headers, and an unbounded claim blob is a DoS / cost vector.
-	maxCustomClaims      = 16
-	maxCustomClaimsBytes = 4096
-	// claimsProviderWarnInterval rate-limits the fail-open WARN so a broken
-	// provider doesn't flood the log on every login.
-	claimsProviderWarnInterval = 30 * time.Second
-)
-
-// reservedClaimKeys can never be injected by a namespace claims provider; the
-// gateway owns these. A provider that returns any of them has them dropped.
-var reservedClaimKeys = map[string]struct{}{
-	"sub": {}, "iss": {}, "aud": {}, "iat": {},
-	"nbf": {}, "exp": {}, "namespace": {}, "custom": {},
-}
-
-// jwtClaimsProvider implements auth.ClaimsResolver by invoking the namespace's
-// reserved auth-claims-provider function.
-type jwtClaimsProvider struct {
-	invoker *serverless.Invoker
-	logger  *zap.Logger
-
-	mu          sync.Mutex
-	lastWarnUTC time.Time
-}
-
-// newJWTClaimsProvider builds the resolver. A nil invoker disables the hook
-// (ResolveClaims returns nil).
-func newJWTClaimsProvider(invoker *serverless.Invoker, logger *zap.Logger) *jwtClaimsProvider {
-	if logger == nil {
-		logger = zap.NewNop()
-	}
-	return &jwtClaimsProvider{invoker: invoker, logger: logger.Named("claims-provider")}
-}
-
-// ResolveClaims invokes the namespace's auth-claims-provider and returns the
-// sanitized additive claims, or nil. Never errors (fail-open contract).
-func (p *jwtClaimsProvider) ResolveClaims(ctx context.Context, wallet, namespace string) map[string]string {
-	if p.invoker == nil || wallet == "" || namespace == "" {
-		return nil
-	}
-
-	input, err := json.Marshal(map[string]string{"wallet": wallet, "namespace": namespace})
-	if err != nil {
-		return nil
-	}
-
-	callCtx, cancel := context.WithTimeout(ctx, claimsProviderTimeout)
-	defer cancel()
-
-	resp, err := p.invoker.Invoke(callCtx, &serverless.InvokeRequest{
-		Namespace:    namespace,
-		FunctionName: claimsProviderFnName,
-		Input:        input,
-		// Gateway-initiated, no end-user caller → system trigger skips the
-		// per-caller authorization check.
-		TriggerType: serverless.TriggerTypeInternal,
-	})
-	if err != nil || resp == nil {
-		// The namespace simply hasn't deployed the function (registry miss) is
-		// the normal no-claims case for most namespaces — stay silent. Any
-		// other failure is a real problem worth a rate-limited WARN.
-		if !errors.Is(err, registry.ErrFunctionNotFound) {
-			p.warnRateLimited("claims provider invoke failed (minting without custom claims)",
-				namespace, err)
-		}
-		return nil
-	}
-	if resp.Status != serverless.InvocationStatusSuccess {
-		p.warnRateLimited("claims provider returned non-success (minting without custom claims)",
-			namespace, nil)
-		return nil
-	}
-
-	return sanitizeProviderClaims(resp.Output)
-}
-
-// sanitizeProviderClaims parses the provider's RAW stdout as a bare JSON object
-// of additive claims (NOT an {ok,result} Ack envelope — per the #976 contract)
-// and returns a safe string→string subset: string values only, reserved keys
-// dropped, bounded count and total size. Any parse failure → nil (fail-open).
-func sanitizeProviderClaims(raw []byte) map[string]string {
-	if len(raw) == 0 || len(raw) > maxCustomClaimsBytes {
-		return nil
-	}
-	var obj map[string]any
-	if err := json.Unmarshal(raw, &obj); err != nil || len(obj) == 0 {
-		return nil
-	}
-	// Iterate in sorted key order so an over-budget provider payload truncates
-	// DETERMINISTICALLY (Go map iteration is randomized) — the same output must
-	// always yield the same claims, never a per-login-varying subset.
-	keys := make([]string, 0, len(obj))
-	for k := range obj {
-		keys = append(keys, k)
-	}
-	sort.Strings(keys)
-
-	out := make(map[string]string, len(obj))
-	total := 0
-	for _, k := range keys {
-		if len(out) >= maxCustomClaims {
-			break
-		}
-		if _, reserved := reservedClaimKeys[k]; reserved {
-			continue
-		}
-		s, ok := obj[k].(string) // string→string contract; non-string values dropped
-		if !ok {
-			continue
-		}
-		total += len(k) + len(s)
-		if total > maxCustomClaimsBytes {
-			break
-		}
-		out[k] = s
-	}
-	if len(out) == 0 {
-		return nil
-	}
-	return out
-}
-
-func (p *jwtClaimsProvider) warnRateLimited(msg, namespace string, err error) {
-	p.mu.Lock()
-	now := time.Now()
-	if now.Sub(p.lastWarnUTC) < claimsProviderWarnInterval {
-		p.mu.Unlock()
-		return
-	}
-	p.lastWarnUTC = now
-	p.mu.Unlock()
-
-	fields := []zap.Field{zap.String("namespace", namespace), zap.String("function", claimsProviderFnName)}
-	if err != nil {
-		fields = append(fields, zap.Error(err))
-	}
-	p.logger.Warn(msg, fields...)
-}
--- a/core/pkg/gateway/claims_provider_test.go
+++ b/core/pkg/gateway/claims_provider_test.go
@ -1,98 +0,0 @@
-package gateway
-
-import (
-	"testing"
-)
-
-// Bugboard #548: the claims-provider sanitizer is the security boundary —
-// a namespace function must NOT be able to forge reserved claims, inject
-// non-string values, or blow the size budget.
-
-func TestSanitizeProviderClaims_happyPath(t *testing.T) {
-	out := sanitizeProviderClaims([]byte(`{"account_id":"u-123","tier":"pro"}`))
-	if out["account_id"] != "u-123" || out["tier"] != "pro" {
-		t.Fatalf("expected additive claims, got %v", out)
-	}
-}
-
-func TestSanitizeProviderClaims_dropsReservedKeys(t *testing.T) {
-	// A malicious provider tries to override sub/exp/namespace — must be dropped.
-	out := sanitizeProviderClaims([]byte(`{"sub":"0xATTACKER","exp":"9999999999","namespace":"evil","account_id":"u-1"}`))
-	for _, k := range []string{"sub", "exp", "namespace"} {
-		if _, present := out[k]; present {
-			t.Errorf("reserved key %q must be dropped, got %v", k, out)
-		}
-	}
-	if out["account_id"] != "u-1" {
-		t.Errorf("legitimate claim dropped: %v", out)
-	}
-}
-
-func TestSanitizeProviderClaims_nonStringValuesDropped(t *testing.T) {
-	out := sanitizeProviderClaims([]byte(`{"account_id":"u-1","num":5,"obj":{"a":1},"arr":[1],"ok":"yes"}`))
-	if len(out) != 2 || out["account_id"] != "u-1" || out["ok"] != "yes" {
-		t.Errorf("non-string values must be dropped; got %v", out)
-	}
-}
-
-func TestSanitizeProviderClaims_failOpenOnGarbage(t *testing.T) {
-	for _, bad := range [][]byte{
-		nil,
-		[]byte(``),
-		[]byte(`not json`),
-		[]byte(`[1,2,3]`),         // array, not object
-		[]byte(`"just a string"`), // scalar
-		[]byte(`{}`),              // empty object
-		[]byte(`{"ok":true,"result":{"account_id":"u"}}`), // Ack envelope (wrong shape) → no top-level string claims
-	} {
-		if got := sanitizeProviderClaims(bad); got != nil {
-			t.Errorf("garbage %q must yield nil (fail-open), got %v", bad, got)
-		}
-	}
-}
-
-func TestSanitizeProviderClaims_countAndSizeCapped(t *testing.T) {
-	// Way more than maxCustomClaims string entries.
-	buf := []byte("{")
-	for i := 0; i < maxCustomClaims+20; i++ {
-		if i > 0 {
-			buf = append(buf, ',')
-		}
-		buf = append(buf, []byte(`"k`)...)
-		buf = append(buf, []byte(itoa(i))...)
-		buf = append(buf, []byte(`":"v"`)...)
-	}
-	buf = append(buf, '}')
-	out := sanitizeProviderClaims(buf)
-	if len(out) > maxCustomClaims {
-		t.Errorf("claim count not capped: got %d, max %d", len(out), maxCustomClaims)
-	}
-
-	// Oversized total payload → rejected outright.
-	big := make([]byte, maxCustomClaimsBytes+10)
-	for i := range big {
-		big[i] = 'a'
-	}
-	if got := sanitizeProviderClaims(big); got != nil {
-		t.Errorf("oversized payload must be rejected, got %v", got)
-	}
-}
-
-func TestResolveClaims_nilInvokerOrEmptyArgs(t *testing.T) {
-	p := newJWTClaimsProvider(nil, nil) // nil invoker disables the hook
-	if got := p.ResolveClaims(nil, "0xW", "ns"); got != nil {
-		t.Errorf("nil invoker must yield nil claims, got %v", got)
-	}
-}
-
-func itoa(n int) string {
-	if n == 0 {
-		return "0"
-	}
-	var b []byte
-	for n > 0 {
-		b = append([]byte{byte('0' + n%10)}, b...)
-		n /= 10
-	}
-	return string(b)
-}
--- a/core/pkg/gateway/dependencies.go
+++ b/core/pkg/gateway/dependencies.go
@ -5,7 +5,6 @@ import (
 	"database/sql"
 	"fmt"
 	"net"
-	"net/url"
 	"os"
 	"path/filepath"
 	"strings"
@ -479,21 +478,15 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe

 	// Create secrets manager for serverless functions (AES-256-GCM encrypted).
 	//
-	// The encryption key is DERIVED from the cluster secret via HKDF
-	// (resolveSecretsEncryptionKeyHex), so every gateway in the cluster computes
-	// the identical key and a secret written on one node decrypts on every other
-	// node and survives rolling upgrades. This replaces the old per-node
-	// crypto/rand key file, whose divergence across an upgraded cluster kept
-	// get_secret broken (bugboard #837). The file key (cfg.SecretsEncryptionKey)
-	// remains only as a fallback when no cluster secret is available (legacy /
-	// single-node test rigs). allowEphemeral=false: a missing/invalid key fails
+	// The encryption key comes from the gateway Config (loaded from
+	// ~/.orama/secrets/secrets-encryption-key), NOT from engineCfg — engineCfg
+	// never has the key set, so passing it always produced a per-process
+	// ephemeral key and made get_secret return undecryptable values
+	// (bugboard #837). allowEphemeral=false: a missing/invalid key fails
 	// loudly here and disables get_secret rather than silently corrupting
 	// secrets.
 	var secretsMgr serverless.SecretsManager
-	if secretsKeyHex, keyErr := resolveSecretsEncryptionKeyHex(cfg.ClusterSecret, cfg.SecretsEncryptionKey); keyErr != nil {
-		logger.ComponentWarn(logging.ComponentGeneral, "Failed to derive secrets encryption key; get_secret will be unavailable",
-			zap.Error(keyErr))
-	} else if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, secretsKeyHex, false, logger.Logger); secretsErr != nil {
+	if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, cfg.SecretsEncryptionKey, false, logger.Logger); secretsErr != nil {
 		logger.ComponentWarn(logging.ComponentGeneral, "Failed to initialize secrets manager; get_secret will be unavailable",
 			zap.Error(secretsErr))
 	} else {
@ -511,7 +504,7 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
 	//
 	// PushDispatcher (legacy) is set only when YAML defaults exist —
 	// kept for back-compat with code that hasn't migrated to Manager.
-	pushDispatcher, pushStore, pushManager, pushCfgStore, pushCredManager, err := buildPushDispatcher(cfg, deps.ORMClient, deps.Client, logger)
+	pushDispatcher, pushStore, pushManager, pushCfgStore, pushCredManager, err := buildPushDispatcher(cfg, deps.ORMClient, logger)
 	if err != nil {
 		// Non-fatal: log and continue. Functions calling push_send will get nil
 		// (silent no-op) and HTTP /v1/push/* endpoints return 503.
@ -655,14 +648,6 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
 		authService.SetRqliteClient(deps.ORMClient)
 	}

-	// Wire the namespace claims-provider hook (bugboard #548): at JWT mint time
-	// the auth service invokes the namespace's reserved `auth-claims-provider`
-	// function (if deployed) and merges its additive claims (e.g. account_id)
-	// into the token. Fail-open — a missing/slow provider never breaks auth.
-	if deps.ServerlessInvoker != nil {
-		authService.SetClaimsResolver(newJWTClaimsProvider(deps.ServerlessInvoker, logger.Logger))
-	}
-
 	// Load or create EdDSA key for new JWT tokens. Bug #215 fix: when
 	// cfg.ClusterSecret is set, the key is derived deterministically from
 	// it via HKDF, so every gateway in the cluster shares the same Ed25519
@ -928,7 +913,6 @@ func appendRQLiteQueryParams(dsn string) string {
 func buildPushDispatcher(
 	cfg *Config,
 	db rqlite.Client,
-	globalDB client.NetworkClient,
 	logger *logging.ColoredLogger,
 ) (*push.PushDispatcher, push.PushDeviceStore, *push.Manager, push.ConfigStore, *pushcreds.Manager, error) {
 	if cfg.ClusterSecret == "" {
@ -965,25 +949,6 @@ func buildPushDispatcher(
 	pushcreds.Register(pushapns.NewValidator())
 	pushcreds.Register(pushntfy.NewValidator())

-	// ntfy cluster fan-out (bugboard #858): the default push infra runs an
-	// independent ntfy per node with no shared store, so a publish must reach
-	// EVERY active node for the subscriber's instance (picked by round-robin
-	// DNS) to receive it. Build a resolver over the global dns_nodes table; the
-	// factory attaches it only to providers using the shared default base URL
-	// (a namespace pointing ntfy at its own server is never fanned across our
-	// cluster). nil globalDB or an unparseable base URL → no fan-out (provider
-	// falls back to the single base URL).
-	var ntfyFanout *ntfyFanoutResolver
-	var ntfyFanoutHost string
-	if globalDB != nil {
-		if base := strings.TrimSpace(cfg.NtfyBaseURL); base != "" {
-			if u, perr := url.Parse(base); perr == nil && u.Hostname() != "" {
-				ntfyFanoutHost = u.Hostname()
-				ntfyFanout = newNtfyFanoutResolver(globalDB, u.Scheme, u.Port(), defaultNtfyFanoutTTL)
-			}
-		}
-	}
-
 	// ProviderFactory turns a resolved Config into the right set of
 	// provider instances. Lives here in dependencies.go because this is
 	// the only place that imports both the manager package and the
@ -1024,13 +989,6 @@ func buildPushDispatcher(
 			}
 		}
 		if ntfyCfg.BaseURL != "" {
-			// Fan out across all push nodes ONLY for the shared default infra.
-			// A namespace that overrode BaseURL with its own ntfy server keeps
-			// single-host delivery (its server, not our cluster).
-			if ntfyFanout != nil && ntfyCfg.BaseURL == cfg.NtfyBaseURL {
-				ntfyCfg.FanoutResolver = ntfyFanout.Hosts
-				ntfyCfg.FanoutHostHeader = ntfyFanoutHost
-			}
 			ps = append(ps, pushntfy.New(ntfyCfg, logger.Logger))
 		}
 		if c.ExpoAccessToken != "" {
--- a/core/pkg/gateway/handlers/auth/handlers_test.go
+++ b/core/pkg/gateway/handlers/auth/handlers_test.go
@ -393,32 +393,6 @@ func TestRefreshHandler_NilAuthService(t *testing.T) {
 	}
 }

-// Bugboard #125: a non-bad-token failure (here ErrRotationNotConfigured from a
-// service with no rqlite client) must surface as a RETRYABLE 503 with a
-// Retry-After header — NOT a 401 that would force a locked device into an
-// impossible SIWE re-auth mid-call-ring.
-func TestRefreshHandler_TransientError_returns503Retryable(t *testing.T) {
-	svc, err := authsvc.NewService(testLogger(), nil, "", "default")
-	if err != nil {
-		t.Fatalf("failed to create auth service: %v", err)
-	}
-	h := NewHandlers(testLogger(), svc, nil, "default", noopInternalAuth)
-
-	body, _ := json.Marshal(RefreshRequest{RefreshToken: "some-valid-looking-token"})
-	req := httptest.NewRequest(http.MethodPost, "/v1/auth/refresh", bytes.NewReader(body))
-	req.Header.Set("Content-Type", "application/json")
-	rec := httptest.NewRecorder()
-
-	h.RefreshHandler(rec, req)
-
-	if rec.Code != http.StatusServiceUnavailable {
-		t.Fatalf("transient refresh failure must be 503, got %d", rec.Code)
-	}
-	if rec.Header().Get("Retry-After") == "" {
-		t.Error("503 refresh response should carry a Retry-After header")
-	}
-}
-
 // --- APIKeyToJWTHandler tests ---------------------------------------------

 func TestAPIKeyToJWTHandler_MissingKey(t *testing.T) {
--- a/core/pkg/gateway/handlers/auth/jwt_handler.go
+++ b/core/pkg/gateway/handlers/auth/jwt_handler.go
@ -2,7 +2,6 @@ package auth

 import (
 	"encoding/json"
-	"errors"
 	"net/http"
 	"strings"
 	"time"
@ -58,7 +57,7 @@ func (h *Handlers) APIKeyToJWTHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	token, expUnix, err := h.authService.GenerateJWT(ns, key, 15*time.Minute, nil)
+	token, expUnix, err := h.authService.GenerateJWT(ns, key, 15*time.Minute)
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
@ -104,20 +103,11 @@ func (h *Handlers) RefreshHandler(w http.ResponseWriter, r *http.Request) {
 	// the SDK persists it (bug #239 fix) and uses it on the next refresh.
 	token, newRefreshToken, subject, expUnix, err := h.authService.RefreshToken(r.Context(), req.RefreshToken, req.Namespace)
 	if err != nil {
-		// Bugboard #125: a TRANSIENT rotation failure (rqlite leader briefly
-		// unavailable during a rolling restart) must surface as a retryable
-		// 503 — NOT a 401 — so the client retries within the call-ring window
-		// instead of tearing the session down to a full SIWE re-auth, which is
-		// impossible on a locked device answering a VoIP-woken call.
-		if errors.Is(err, authsvc.ErrRefreshTransient) || errors.Is(err, authsvc.ErrRotationNotConfigured) {
-			w.Header().Set("Retry-After", "1")
-			writeError(w, http.StatusServiceUnavailable, "refresh temporarily unavailable, retry")
-			return
-		}
-		// Genuine bad/expired/replayed token. The service emits a WARN log on
-		// replay (ErrRefreshTokenReplay) so the operator can investigate. We
-		// surface a generic 401 regardless — leaking "your token was already
-		// used" would help an attacker confirm a stolen token was rotated.
+		// The service emits a WARN log on replay (ErrRefreshTokenReplay)
+		// so the operator can investigate. We surface a generic 401 here
+		// regardless — leaking "your token was already used" to the
+		// caller would help an attacker confirm a stolen token has been
+		// rotated.
 		writeError(w, http.StatusUnauthorized, "invalid or expired refresh token")
 		return
 	}
--- a/core/pkg/gateway/handlers/serverless/types.go
+++ b/core/pkg/gateway/handlers/serverless/types.go
@ -157,24 +157,6 @@ func (h *ServerlessHandlers) getJWTSubjectFromRequest(r *http.Request) string {
 	return strings.TrimSpace(claims.Sub)
 }

-// getJWTExpiryFromRequest returns the Bearer JWT's `exp` claim (unix seconds)
-// if the request was JWT-authenticated, or 0 otherwise (e.g. API-key auth, or
-// a token without an exp). Persistent WS connections capture this at upgrade
-// to enforce mid-session expiry — a long-lived socket must stop serving RPCs
-// once its authorizing token expires, unless refreshed via the #321
-// auth.refresh control frame. Bugboard #868.
-func (h *ServerlessHandlers) getJWTExpiryFromRequest(r *http.Request) int64 {
-	v := r.Context().Value(ctxkeys.JWT)
-	if v == nil {
-		return 0
-	}
-	claims, ok := v.(*auth.JWTClaims)
-	if !ok || claims == nil {
-		return 0
-	}
-	return claims.Exp
-}
-
 // getWalletFromRequest extracts wallet address from JWT.
 func (h *ServerlessHandlers) getWalletFromRequest(r *http.Request) string {
 	// Import strings package functions inline to avoid circular dependencies
--- a/core/pkg/gateway/handlers/serverless/ws_persistent_expiry_test.go
+++ b/core/pkg/gateway/handlers/serverless/ws_persistent_expiry_test.go
@ -1,152 +0,0 @@
-package serverless
-
-import (
-	"context"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-	"time"
-
-	"github.com/DeBrosOfficial/network/pkg/gateway/auth"
-	"github.com/DeBrosOfficial/network/pkg/gateway/ctxkeys"
-)
-
-// TestWSJWTExpired is the core security regression guard for bugboard #868: a
-// persistent WS authenticates ONCE at upgrade, and the read loop must stop
-// serving application frames once the authorizing JWT is past exp+grace.
-//
-// If wsJWTExpired starts returning false for a clearly-expired token (or true
-// for a still-valid one), an expired token regains full RPC access — including
-// turn.credentials minting — for the socket's lifetime.
-func TestWSJWTExpired(t *testing.T) {
-	// Fixed reference instant so the table is deterministic (the read loop
-	// uses time.Now() in production; the pure function takes `now` for tests).
-	now := time.Unix(1_700_000_000, 0)
-	grace := 120 * time.Second
-
-	cases := []struct {
-		name    string
-		expUnix int64
-		now     time.Time
-		want    bool
-	}{
-		{
-			name:    "no expiry to enforce (API-key auth, exp=0) never expires",
-			expUnix: 0,
-			now:     now,
-			want:    false,
-		},
-		{
-			name:    "negative exp treated as no-expiry (defensive)",
-			expUnix: -5,
-			now:     now,
-			want:    false,
-		},
-		{
-			name:    "token valid, well before exp",
-			expUnix: now.Add(10 * time.Minute).Unix(),
-			now:     now,
-			want:    false,
-		},
-		{
-			name:    "token just past exp but inside grace window — still allowed",
-			expUnix: now.Add(-30 * time.Second).Unix(),
-			now:     now,
-			want:    false,
-		},
-		{
-			name:    "token exactly at exp+grace boundary — not yet expired (After is strict)",
-			expUnix: now.Add(-grace).Unix(),
-			now:     now,
-			want:    false,
-		},
-		{
-			name:    "token past exp+grace — expired, must reject",
-			expUnix: now.Add(-(grace + time.Second)).Unix(),
-			now:     now,
-			want:    true,
-		},
-		{
-			name:    "token long expired — expired",
-			expUnix: now.Add(-24 * time.Hour).Unix(),
-			now:     now,
-			want:    true,
-		},
-	}
-
-	for _, tc := range cases {
-		t.Run(tc.name, func(t *testing.T) {
-			got := wsJWTExpired(tc.expUnix, tc.now, grace)
-			if got != tc.want {
-				t.Errorf("wsJWTExpired(exp=%d, now=%d, grace=%s) = %v; want %v",
-					tc.expUnix, tc.now.Unix(), grace, got, tc.want)
-			}
-		})
-	}
-}
-
-// TestGetJWTExpiryFromRequest verifies the gateway reads the authorizing JWT's
-// exp off the request context at upgrade. This is the value the read loop
-// enforces for the socket's lifetime (#868); if it silently returns 0 for a
-// JWT-authenticated request, expiry enforcement is disabled and the bug
-// re-opens.
-func TestGetJWTExpiryFromRequest(t *testing.T) {
-	h := newTestHandlers(nil)
-
-	t.Run("JWT with exp returns exp", func(t *testing.T) {
-		claims := &auth.JWTClaims{Sub: "alice", Exp: 1_700_000_123}
-		req := httptest.NewRequest(http.MethodGet, "/", nil)
-		req = req.WithContext(context.WithValue(req.Context(), ctxkeys.JWT, claims))
-
-		if got := h.getJWTExpiryFromRequest(req); got != 1_700_000_123 {
-			t.Errorf("getJWTExpiryFromRequest = %d; want 1700000123", got)
-		}
-	})
-
-	t.Run("no JWT on context returns 0 (API-key / unauthenticated)", func(t *testing.T) {
-		req := httptest.NewRequest(http.MethodGet, "/", nil)
-		if got := h.getJWTExpiryFromRequest(req); got != 0 {
-			t.Errorf("getJWTExpiryFromRequest = %d; want 0", got)
-		}
-	})
-
-	t.Run("nil claims under key returns 0", func(t *testing.T) {
-		req := httptest.NewRequest(http.MethodGet, "/", nil)
-		var nilClaims *auth.JWTClaims
-		req = req.WithContext(context.WithValue(req.Context(), ctxkeys.JWT, nilClaims))
-		if got := h.getJWTExpiryFromRequest(req); got != 0 {
-			t.Errorf("getJWTExpiryFromRequest = %d; want 0", got)
-		}
-	})
-}
-
-// TestWSAuthState_refreshExtendsExpiry documents the auth.refresh contract that
-// the read loop relies on (#868 + #321): a successful auth.refresh moves the
-// enforced expiry forward to the new token's exp, so a socket that refreshes
-// before its grace window closes keeps serving RPCs uninterrupted.
-//
-// We assert the state-transition directly (the full handler needs a live WS
-// conn for the ack write; that path is exercised by integration tests). The
-// invariant: after refresh, a `now` that WOULD have expired the old token no
-// longer expires the socket.
-func TestWSAuthState_refreshExtendsExpiry(t *testing.T) {
-	now := time.Unix(1_700_000_000, 0)
-	grace := 120 * time.Second
-
-	oldExp := now.Add(-(grace + time.Minute)).Unix() // already past grace → expired
-	state := &wsAuthState{expUnix: oldExp}
-
-	if !wsJWTExpired(state.expUnix, now, grace) {
-		t.Fatalf("precondition: old token should be expired at now")
-	}
-
-	// Simulate what handleAuthRefresh does on success: adopt the new token's
-	// exp.
-	newExp := now.Add(15 * time.Minute).Unix()
-	state.expUnix = newExp
-
-	if wsJWTExpired(state.expUnix, now, grace) {
-		t.Errorf("after refresh the socket must NOT be expired (exp=%d, now=%d)",
-			state.expUnix, now.Unix())
-	}
-}
--- a/core/pkg/gateway/handlers/serverless/ws_persistent_handler.go
+++ b/core/pkg/gateway/handlers/serverless/ws_persistent_handler.go
@ -22,51 +22,6 @@ import (
 // application traffic that goes straight to WASM. Bugboard #321.
 var oramaControlFramePrefix = []byte(`"__orama"`)

-const (
-	// wsJWTExpiryGrace is the slack past a JWT's `exp` before the gateway
-	// stops serving application frames on a persistent WS. It covers clock
-	// skew between the gateway and the issuing path plus the client's
-	// refresh round-trip (the #321 auth.refresh control frame). Bugboard
-	// #868: without this, a socket authenticated ONCE at upgrade keeps full
-	// RPC access — including turn.credentials minting — for the socket's
-	// entire lifetime even after the token expires.
-	//
-	// Note: on the auth.refresh path ParseAndVerifyJWT independently allows
-	// its own ±60s exp skew, so worst-case service-past-exp is this grace
-	// plus that skew (~180s), not 120s flat. Both bounds are deliberate and
-	// the socket is force-closed once they elapse.
-	wsJWTExpiryGrace = 120 * time.Second
-
-	// wsCloseJWTExpired is the application-specific WS close code sent when a
-	// persistent socket is torn down for serving past its JWT expiry. It sits
-	// in the private-use range (4000-4999) and is distinct from protocol
-	// codes so clients can special-case it as "reconnect with a fresh token".
-	// Bugboard #868.
-	wsCloseJWTExpired = 4401
-)
-
-// wsAuthState carries the live JWT expiry for a persistent WS across the read
-// loop and the auth.refresh control handler. Both run in the SAME goroutine —
-// control frames are handled inline in the read loop before any frame reaches
-// WASM — so the field needs no synchronization. Bugboard #868.
-type wsAuthState struct {
-	// expUnix is the `exp` (unix seconds) of the JWT currently authorizing
-	// this socket. 0 means "no expiry to enforce" (e.g. API-key auth or a
-	// token without exp) — such sockets are exempt from mid-session expiry.
-	expUnix int64
-}
-
-// wsJWTExpired reports whether a persistent WS authorized by a JWT expiring at
-// expUnix (unix seconds) has passed its enforcement deadline at time now,
-// allowing grace for clock skew + refresh round-trip. expUnix <= 0 means there
-// is no expiry to enforce and is never considered expired. Bugboard #868.
-func wsJWTExpired(expUnix int64, now time.Time, grace time.Duration) bool {
-	if expUnix <= 0 {
-		return false
-	}
-	return now.After(time.Unix(expUnix, 0).Add(grace))
-}
-
 // oramaControlFrame is the wire shape for gateway-handled control
 // frames on a persistent WS. The single Type field discriminates;
 // payload fields specific to each Type ride alongside.
@ -142,12 +97,6 @@ func (h *ServerlessHandlers) handlePersistentWebSocket(
 	invCtx := h.buildPersistentInvocationContext(r, fn, clientID)
 	callerWallet := invCtx.CallerWallet

-	// Capture the authorizing JWT's expiry so the read loop can enforce it
-	// for the socket's lifetime (bugboard #868). A successful auth.refresh
-	// control frame updates this in place; 0 (non-JWT auth) disables the
-	// check.
-	authState := &wsAuthState{expUnix: h.getJWTExpiryFromRequest(r)}
-
 	// Instantiate the persistent module. This compiles once (cached) and
 	// creates one wazero instance bound to this connection.
 	module, err := h.engine.InstantiatePersistent(r.Context(), fn, invCtx)
@ -247,7 +196,7 @@ func (h *ServerlessHandlers) handlePersistentWebSocket(
 		// avoids json.Unmarshal for every application frame. Only
 		// frames carrying the `"__orama"` key get parsed.
 		if bytes.Contains(frame, oramaControlFramePrefix) {
-			handled, ackErr := h.handleOramaControlFrame(frame, fn, inst, authState, namespace, clientID, conn)
+			handled, ackErr := h.handleOramaControlFrame(frame, fn, inst, namespace, clientID, conn)
 			if ackErr != nil {
 				h.logger.Warn("persistent WS: control-frame ack write failed",
 					zap.String("client_id", clientID),
@ -264,26 +213,6 @@ func (h *ServerlessHandlers) handlePersistentWebSocket(
 			// application frame.
 		}

-		// Bugboard #868: a persistent WS authenticates ONCE at upgrade.
-		// Before handing an application frame to WASM, reject it once the
-		// authorizing JWT is past exp+grace — otherwise an expired token
-		// keeps serving RPCs (incl. turn.credentials minting) indefinitely.
-		// The client keeps the socket alive by sending an
-		// {"__orama":"auth.refresh"} control frame (handled above, which
-		// bypasses this check) before the token expires. The check runs
-		// only on application frames so an expired client can still recover
-		// via auth.refresh rather than being locked out.
-		if wsJWTExpired(authState.expUnix, time.Now(), wsJWTExpiryGrace) {
-			h.logger.Info("persistent WS: closing — JWT expired without refresh",
-				zap.String("client_id", clientID),
-				zap.String("namespace", namespace),
-				zap.Int64("jwt_exp", authState.expUnix))
-			_ = conn.WriteControl(websocket.CloseMessage,
-				websocket.FormatCloseMessage(wsCloseJWTExpired, "jwt expired; reconnect with a fresh token"),
-				time.Now().Add(time.Second))
-			break
-		}
-
 		if err := inst.Submit(frame); err != nil {
 			h.logger.Warn("persistent WS submit failed (queue full?)",
 				zap.String("client_id", clientID),
@ -347,7 +276,6 @@ func (h *ServerlessHandlers) handleOramaControlFrame(
 	frame []byte,
 	fn *serverless.Function,
 	inst *persistent.Instance,
-	authState *wsAuthState,
 	namespace, clientID string,
 	conn *websocket.Conn,
 ) (handled bool, ackErr error) {
@ -363,7 +291,7 @@ func (h *ServerlessHandlers) handleOramaControlFrame(

 	switch ctrl.Type {
 	case "auth.refresh":
-		return true, h.handleAuthRefresh(ctrl, fn, inst, authState, namespace, clientID, conn)
+		return true, h.handleAuthRefresh(ctrl, fn, inst, namespace, clientID, conn)
 	default:
 		// Unknown control type — ack with an error so the client knows
 		// the frame was seen but ignored. Treat as handled (don't
@ -384,7 +312,6 @@ func (h *ServerlessHandlers) handleAuthRefresh(
 	ctrl oramaControlFrame,
 	fn *serverless.Function,
 	inst *persistent.Instance,
-	authState *wsAuthState,
 	namespace, clientID string,
 	conn *websocket.Conn,
 ) error {
@ -480,12 +407,6 @@ func (h *ServerlessHandlers) handleAuthRefresh(
 		})
 	}

-	// Extend the socket's expiry enforcement to the new token's exp so the
-	// read loop keeps serving RPCs past the old deadline (bugboard #868).
-	// authState and the read loop share this goroutine, so the write is
-	// race-free.
-	authState.expUnix = claims.Exp
-
 	h.logger.Info("persistent WS: auth.refresh applied",
 		zap.String("client_id", clientID),
 		zap.String("namespace", namespace),
--- a/core/pkg/gateway/jwt_test.go
+++ b/core/pkg/gateway/jwt_test.go
@ -23,7 +23,7 @@ func TestJWTGenerateAndParse(t *testing.T) {
 		t.Fatalf("failed to create service: %v", err)
 	}

-	tok, exp, err := svc.GenerateJWT("ns1", "subj", time.Minute, nil)
+	tok, exp, err := svc.GenerateJWT("ns1", "subj", time.Minute)
 	if err != nil || exp <= 0 {
 		t.Fatalf("gen err=%v exp=%d", err, exp)
 	}
@ -50,7 +50,7 @@ func TestJWTExpired(t *testing.T) {
 	}

 	// Use sufficiently negative TTL to bypass allowed clock skew
-	tok, _, err := svc.GenerateJWT("ns1", "subj", -2*time.Minute, nil)
+	tok, _, err := svc.GenerateJWT("ns1", "subj", -2*time.Minute)
 	if err != nil {
 		t.Fatalf("gen err=%v", err)
 	}
--- a/core/pkg/gateway/middleware_ws_jwt_test.go
+++ b/core/pkg/gateway/middleware_ws_jwt_test.go
@ -51,7 +51,7 @@ func newAuthServiceForTest(t *testing.T) *auth.Service {

 func TestAuthMiddleware_WSJWTQuery_validToken(t *testing.T) {
 	svc := newAuthServiceForTest(t)
-	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET_SUBJECT", 15*time.Minute, nil)
+	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET_SUBJECT", 15*time.Minute)
 	if err != nil {
 		t.Fatalf("GenerateJWT: %v", err)
 	}
@ -125,7 +125,7 @@ func TestAuthMiddleware_WSJWTQuery_ignoredOnNonWSRequest(t *testing.T) {
 	// privacy issues of JWTs leaking via referrer headers, browser history,
 	// and access logs.
 	svc := newAuthServiceForTest(t)
-	token, _, err := svc.GenerateJWT("ns", "sub", 15*time.Minute, nil)
+	token, _, err := svc.GenerateJWT("ns", "sub", 15*time.Minute)
 	if err != nil {
 		t.Fatalf("GenerateJWT: %v", err)
 	}
@ -156,8 +156,8 @@ func TestAuthMiddleware_WSJWTQuery_headerWinsOverQuery(t *testing.T) {
 	// Header path runs FIRST and wins. Verifies the query fallback is a
 	// fallback, not an override.
 	svc := newAuthServiceForTest(t)
-	headerJWT, _, _ := svc.GenerateJWT("ns-header", "sub-header", 15*time.Minute, nil)
-	queryJWT, _, _ := svc.GenerateJWT("ns-query", "sub-query", 15*time.Minute, nil)
+	headerJWT, _, _ := svc.GenerateJWT("ns-header", "sub-header", 15*time.Minute)
+	queryJWT, _, _ := svc.GenerateJWT("ns-query", "sub-query", 15*time.Minute)

 	g := &Gateway{authService: svc}

@ -242,7 +242,7 @@ func TestAuthMiddleware_WSJWTQuery_malformedJWTFallsThrough(t *testing.T) {

 func TestValidateAuthForNamespaceProxy_WSJWTQuery(t *testing.T) {
 	svc := newAuthServiceForTest(t)
-	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute, nil)
+	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute)
 	if err != nil {
 		t.Fatalf("GenerateJWT: %v", err)
 	}
@ -270,7 +270,7 @@ func TestValidateAuthForNamespaceProxy_WSJWTQuery(t *testing.T) {

 func TestValidateAuthForNamespaceProxy_WSJWTQuery_ignoredOnNonWS(t *testing.T) {
 	svc := newAuthServiceForTest(t)
-	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute, nil)
+	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute)
 	if err != nil {
 		t.Fatalf("GenerateJWT: %v", err)
 	}
@ -295,7 +295,7 @@ func TestValidateAuthForNamespaceProxy_WSJWTQuery_ignoredOnNonWS(t *testing.T) {
 // doesn't leak into proxy hops or downstream logs.
 func TestAuthMiddleware_WSJWTQuery_strippedAfterVerify(t *testing.T) {
 	svc := newAuthServiceForTest(t)
-	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute, nil)
+	token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute)
 	if err != nil {
 		t.Fatalf("GenerateJWT: %v", err)
 	}
--- a/core/pkg/gateway/push_fanout.go
+++ b/core/pkg/gateway/push_fanout.go
@ -1,95 +0,0 @@
-package gateway
-
-import (
-	"context"
-	"fmt"
-	"sync"
-	"time"
-
-	"github.com/DeBrosOfficial/network/pkg/client"
-)
-
-// defaultNtfyFanoutTTL bounds how long the active-push-node list is cached
-// before re-querying dns_nodes. Matches the DNS heartbeat cadence, so a node
-// added/removed is picked up within a heartbeat without hammering rqlite on
-// every push.
-const defaultNtfyFanoutTTL = 30 * time.Second
-
-// ntfyFanoutResolver resolves the set of ntfy publish base URLs (one per active
-// push node) for fan-out delivery, caching the result for a short TTL. Each
-// node runs an independent ntfy with no shared store, so a publish must reach
-// every node for the subscriber's instance to receive it (bugboard #858).
-type ntfyFanoutResolver struct {
-	// query returns the public IPs of the currently-active push nodes. Injected
-	// so the cache/transform logic is unit-testable without a live cluster.
-	query  func(ctx context.Context) ([]string, error)
-	scheme string // "https" (prod) / "http" (dev), from the configured base URL
-	port   string // explicit port from the base URL, or "" for the scheme default
-
-	ttl      time.Duration
-	mu       sync.Mutex
-	cached   []string
-	cachedAt time.Time
-}
-
-// newNtfyFanoutResolver builds a resolver backed by the global dns_nodes table.
-func newNtfyFanoutResolver(globalDB client.NetworkClient, scheme, port string, ttl time.Duration) *ntfyFanoutResolver {
-	return &ntfyFanoutResolver{
-		scheme: scheme,
-		port:   port,
-		ttl:    ttl,
-		query: func(ctx context.Context) ([]string, error) {
-			db := globalDB.Database()
-			res, err := db.Query(client.WithInternalAuth(ctx), "SELECT ip_address FROM dns_nodes WHERE status = 'active'")
-			if err != nil {
-				return nil, fmt.Errorf("query active push nodes: %w", err)
-			}
-			if res == nil {
-				return nil, nil
-			}
-			ips := make([]string, 0, len(res.Rows))
-			for _, row := range res.Rows {
-				if len(row) == 0 {
-					continue
-				}
-				if ip, ok := row[0].(string); ok && ip != "" {
-					ips = append(ips, ip)
-				}
-			}
-			return ips, nil
-		},
-	}
-}
-
-// Hosts returns the cached fan-out base URLs, refreshing from the query when the
-// cache is stale. On a query error it returns the last-known list (possibly nil)
-// alongside the error, so the caller can decide to fall back to its base URL
-// rather than dropping a push.
-func (r *ntfyFanoutResolver) Hosts(ctx context.Context) ([]string, error) {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-
-	if r.cached != nil && time.Since(r.cachedAt) < r.ttl {
-		return r.cached, nil
-	}
-
-	ips, err := r.query(ctx)
-	if err != nil {
-		return r.cached, err
-	}
-
-	hosts := make([]string, 0, len(ips))
-	suffix := ""
-	if r.port != "" {
-		suffix = ":" + r.port
-	}
-	for _, ip := range ips {
-		if ip == "" {
-			continue
-		}
-		hosts = append(hosts, r.scheme+"://"+ip+suffix)
-	}
-	r.cached = hosts
-	r.cachedAt = time.Now()
-	return hosts, nil
-}
--- a/core/pkg/gateway/push_fanout_test.go
+++ b/core/pkg/gateway/push_fanout_test.go
@ -1,125 +0,0 @@
-package gateway
-
-import (
-	"context"
-	"errors"
-	"testing"
-	"time"
-)
-
-// Bugboard #858 — the fan-out resolver turns active dns_nodes into ntfy publish
-// base URLs and caches them for a short TTL. These pin the transform + caching.
-
-func TestNtfyFanoutResolver_buildsSchemeHostPort(t *testing.T) {
-	r := &ntfyFanoutResolver{
-		scheme: "https",
-		port:   "",
-		ttl:    time.Minute,
-		query:  func(context.Context) ([]string, error) { return []string{"1.2.3.4", "5.6.7.8"}, nil },
-	}
-	hosts, err := r.Hosts(context.Background())
-	if err != nil {
-		t.Fatalf("Hosts: %v", err)
-	}
-	want := []string{"https://1.2.3.4", "https://5.6.7.8"}
-	if len(hosts) != len(want) {
-		t.Fatalf("got %v; want %v", hosts, want)
-	}
-	for i := range want {
-		if hosts[i] != want[i] {
-			t.Errorf("host[%d] = %q; want %q", i, hosts[i], want[i])
-		}
-	}
-}
-
-func TestNtfyFanoutResolver_includesExplicitPort(t *testing.T) {
-	r := &ntfyFanoutResolver{
-		scheme: "http",
-		port:   "8090",
-		ttl:    time.Minute,
-		query:  func(context.Context) ([]string, error) { return []string{"10.0.0.6"}, nil },
-	}
-	hosts, _ := r.Hosts(context.Background())
-	if len(hosts) != 1 || hosts[0] != "http://10.0.0.6:8090" {
-		t.Errorf("got %v; want [http://10.0.0.6:8090]", hosts)
-	}
-}
-
-func TestNtfyFanoutResolver_skipsEmptyIPs(t *testing.T) {
-	r := &ntfyFanoutResolver{
-		scheme: "https",
-		ttl:    time.Minute,
-		query:  func(context.Context) ([]string, error) { return []string{"", "1.2.3.4", ""}, nil },
-	}
-	hosts, _ := r.Hosts(context.Background())
-	if len(hosts) != 1 || hosts[0] != "https://1.2.3.4" {
-		t.Errorf("got %v; want only the non-empty IP", hosts)
-	}
-}
-
-func TestNtfyFanoutResolver_cachesWithinTTL(t *testing.T) {
-	calls := 0
-	r := &ntfyFanoutResolver{
-		scheme: "https",
-		ttl:    time.Minute,
-		query: func(context.Context) ([]string, error) {
-			calls++
-			return []string{"1.2.3.4"}, nil
-		},
-	}
-	for i := 0; i < 3; i++ {
-		if _, err := r.Hosts(context.Background()); err != nil {
-			t.Fatalf("Hosts: %v", err)
-		}
-	}
-	if calls != 1 {
-		t.Errorf("query called %d times; want 1 (cached within TTL)", calls)
-	}
-}
-
-func TestNtfyFanoutResolver_requeriesAfterTTL(t *testing.T) {
-	calls := 0
-	r := &ntfyFanoutResolver{
-		scheme: "https",
-		ttl:    time.Nanosecond, // expire immediately
-		query: func(context.Context) ([]string, error) {
-			calls++
-			return []string{"1.2.3.4"}, nil
-		},
-	}
-	_, _ = r.Hosts(context.Background())
-	time.Sleep(time.Millisecond)
-	_, _ = r.Hosts(context.Background())
-	if calls != 2 {
-		t.Errorf("query called %d times; want 2 (TTL expired between calls)", calls)
-	}
-}
-
-func TestNtfyFanoutResolver_queryError_returnsStaleCache(t *testing.T) {
-	fail := false
-	r := &ntfyFanoutResolver{
-		scheme: "https",
-		ttl:    time.Nanosecond,
-		query: func(context.Context) ([]string, error) {
-			if fail {
-				return nil, errors.New("rqlite unreachable")
-			}
-			return []string{"1.2.3.4"}, nil
-		},
-	}
-	// Prime the cache.
-	if _, err := r.Hosts(context.Background()); err != nil {
-		t.Fatalf("prime: %v", err)
-	}
-	time.Sleep(time.Millisecond)
-	// Now the query fails — Hosts must return the stale cache alongside the error
-	// so the caller can fall back rather than drop the push.
-	fail = true
-	hosts, err := r.Hosts(context.Background())
-	if err == nil {
-		t.Fatal("want the query error surfaced")
-	}
-	if len(hosts) != 1 || hosts[0] != "https://1.2.3.4" {
-		t.Errorf("want the stale cache returned on error; got %v", hosts)
-	}
-}
--- a/core/pkg/gateway/secrets_key.go
+++ b/core/pkg/gateway/secrets_key.go
@ -1,49 +0,0 @@
-package gateway
-
-import (
-	"encoding/hex"
-	"strings"
-
-	"github.com/DeBrosOfficial/network/pkg/secrets"
-)
-
-// secretsEncryptionDerivePurpose is the HKDF info label used to derive the
-// function-secrets AES-256 key from the cluster secret. Deriving it (instead of
-// generating a per-node crypto/rand key file) guarantees every gateway in the
-// cluster computes the IDENTICAL key, so a secret written on one node decrypts
-// on every other node and survives rolling upgrades — eliminating the
-// key-divergence / convergence-window class that kept get_secret broken for
-// days (bugboard #837). Same pattern as the cluster-wide JWT signing key
-// (jwtEdDSADerivePurpose) and the TURN encryption key ("turn-encryption").
-//
-// Bumping the version label (e.g. "...-v2") is a DELIBERATE rotation that
-// invalidates every stored function secret (they must be re-`set`). It must
-// never be changed casually.
-const secretsEncryptionDerivePurpose = "orama-secrets-encryption-v1"
-
-// resolveSecretsEncryptionKeyHex returns the hex-encoded AES-256 key the
-// serverless secrets manager should use to encrypt/decrypt function secrets.
-//
-// Primary: derive deterministically from the cluster secret via HKDF, so the
-// key is identical on every gateway in the cluster and stable across restarts
-// and rolling upgrades. The cluster secret is TrimSpace'd first so a stray
-// trailing newline on one node's secret file can't silently diverge its derived
-// key from the rest of the cluster (the host gateway reads the file untrimmed
-// while the namespace gateway trims it — without this they could derive
-// different keys and reintroduce #837).
-//
-// Fallback: when no cluster secret is available (single-node test rigs / legacy
-// deployments without a shared secret), fall back to an explicitly-configured
-// key file. An empty result then makes the production secrets manager fail loud
-// (NewDBSecretsManager with allowEphemeral=false), rather than silently using a
-// per-process ephemeral key.
-func resolveSecretsEncryptionKeyHex(clusterSecret, fileKeyHex string) (string, error) {
-	if cs := strings.TrimSpace(clusterSecret); cs != "" {
-		key, err := secrets.DeriveKey(cs, secretsEncryptionDerivePurpose)
-		if err != nil {
-			return "", err
-		}
-		return hex.EncodeToString(key), nil
-	}
-	return strings.TrimSpace(fileKeyHex), nil
-}
--- a/core/pkg/gateway/secrets_key_test.go
+++ b/core/pkg/gateway/secrets_key_test.go
@ -1,95 +0,0 @@
-package gateway
-
-import (
-	"encoding/hex"
-	"testing"
-
-	"github.com/DeBrosOfficial/network/pkg/secrets"
-)
-
-// Bugboard #837 — the function-secrets AES key must be DERIVED from the cluster
-// secret (not a per-node random file), so every gateway computes the identical
-// key and stored secrets survive rolling upgrades. These pin the derivation.
-
-func TestResolveSecretsEncryptionKeyHex_deterministic(t *testing.T) {
-	// Same cluster secret → byte-identical key, every time. This is the whole
-	// point: any gateway in the cluster derives the same key, so a secret set on
-	// one node decrypts on all others.
-	const cs = "cluster-secret-abc123"
-	a, err := resolveSecretsEncryptionKeyHex(cs, "")
-	if err != nil {
-		t.Fatalf("resolve: %v", err)
-	}
-	b, err := resolveSecretsEncryptionKeyHex(cs, "")
-	if err != nil {
-		t.Fatalf("resolve: %v", err)
-	}
-	if a == "" || a != b {
-		t.Fatalf("derivation not deterministic: %q vs %q", a, b)
-	}
-	// Valid AES-256 key: 32 bytes = 64 hex chars.
-	raw, err := hex.DecodeString(a)
-	if err != nil || len(raw) != 32 {
-		t.Errorf("derived key is not 32-byte hex: len(raw)=%d err=%v", len(raw), err)
-	}
-}
-
-func TestResolveSecretsEncryptionKeyHex_trimInvariant(t *testing.T) {
-	// A trailing newline on one node's cluster-secret file must NOT change the
-	// derived key — otherwise the host gateway (reads untrimmed) and a namespace
-	// gateway (reads trimmed) would diverge and reintroduce #837.
-	trimmed, _ := resolveSecretsEncryptionKeyHex("cluster-secret-abc123", "")
-	withNL, _ := resolveSecretsEncryptionKeyHex("cluster-secret-abc123\n", "")
-	withSpaces, _ := resolveSecretsEncryptionKeyHex("  cluster-secret-abc123\t\n", "")
-	if trimmed != withNL || trimmed != withSpaces {
-		t.Errorf("derived key is not whitespace-invariant: %q / %q / %q", trimmed, withNL, withSpaces)
-	}
-}
-
-func TestResolveSecretsEncryptionKeyHex_distinctSecretsDistinctKeys(t *testing.T) {
-	a, _ := resolveSecretsEncryptionKeyHex("cluster-secret-A", "")
-	b, _ := resolveSecretsEncryptionKeyHex("cluster-secret-B", "")
-	if a == b {
-		t.Errorf("distinct cluster secrets must derive distinct keys; both = %q", a)
-	}
-}
-
-func TestResolveSecretsEncryptionKeyHex_purposeSeparatedFromTURN(t *testing.T) {
-	// The secrets key must NOT equal the TURN key derived from the same cluster
-	// secret — domain separation via the HKDF info label.
-	const cs = "cluster-secret-abc123"
-	secretsHex, _ := resolveSecretsEncryptionKeyHex(cs, "")
-	turnKey, err := secrets.DeriveKey(cs, "turn-encryption")
-	if err != nil {
-		t.Fatalf("derive turn key: %v", err)
-	}
-	if secretsHex == hex.EncodeToString(turnKey) {
-		t.Error("secrets key collides with the TURN key — HKDF purpose label not providing domain separation")
-	}
-}
-
-func TestResolveSecretsEncryptionKeyHex_emptyClusterSecretUsesFileKey(t *testing.T) {
-	// Legacy/test rigs with no cluster secret fall back to the explicitly
-	// configured file key (trimmed).
-	const fileKey = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
-	got, err := resolveSecretsEncryptionKeyHex("", fileKey+"\n")
-	if err != nil {
-		t.Fatalf("resolve: %v", err)
-	}
-	if got != fileKey {
-		t.Errorf("empty cluster secret should return the trimmed file key; got %q", got)
-	}
-}
-
-func TestResolveSecretsEncryptionKeyHex_emptyBothReturnsEmpty(t *testing.T) {
-	// No cluster secret AND no file key → empty result, which makes the
-	// production secrets manager fail loud (allowEphemeral=false) instead of
-	// silently using an ephemeral key.
-	got, err := resolveSecretsEncryptionKeyHex("", "")
-	if err != nil {
-		t.Fatalf("resolve: %v", err)
-	}
-	if got != "" {
-		t.Errorf("want empty result when neither source has a key; got %q", got)
-	}
-}
--- a/core/pkg/namespace/cluster_manager.go
+++ b/core/pkg/namespace/cluster_manager.go
@ -86,11 +86,6 @@ type ClusterManager struct {
 	// Track provisioning operations
 	provisioningMu sync.RWMutex
 	provisioning   map[string]bool // namespace -> in progress
-
-	// Leadership-locality reconciler cooldown (bugboard #708): per-namespace
-	// timestamp of the last leadership transfer, to bound churn. Lazy-init.
-	leaderLocalityMu       sync.Mutex
-	leaderLocalityCooldown map[string]time.Time
 }

 // NewClusterManager creates a new cluster manager
@ -1790,24 +1785,8 @@ func (cm *ClusterManager) saveLocalState(state *ClusterLocalState) error {
 		return fmt.Errorf("failed to marshal state: %w", err)
 	}
 	path := filepath.Join(dir, "cluster-state.json")
-	// Atomic write: this file now carries the namespace TURN shared secret
-	// (bugboard #130) and is rewritten from multiple converge paths. Write a
-	// temp file then rename over the target so a reader (or a concurrent
-	// writer) never observes a half-written secret — rename is atomic on the
-	// same filesystem. 0600 + chmod on the temp file keeps the secret out of
-	// world/group read; the rename then makes the live file 0600 too, which
-	// also tightens a file an older release left at 0644.
-	tmp := path + ".tmp"
-	if err := os.WriteFile(tmp, data, 0600); err != nil {
-		return fmt.Errorf("failed to write temp state file: %w", err)
-	}
-	if err := os.Chmod(tmp, 0600); err != nil {
-		os.Remove(tmp)
-		return fmt.Errorf("failed to set temp state file permissions: %w", err)
-	}
-	if err := os.Rename(tmp, path); err != nil {
-		os.Remove(tmp)
-		return fmt.Errorf("failed to rename state file into place: %w", err)
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		return fmt.Errorf("failed to write state file: %w", err)
 	}
 	cm.logger.Info("Saved cluster local state", zap.String("namespace", state.NamespaceName), zap.String("path", path))
 	return nil
@ -1859,78 +1838,12 @@ func (cm *ClusterManager) RestoreLocalClustersFromDisk(ctx context.Context) (int

 // restoreWebRTC is the resolved WebRTC gateway config for a restored
 // namespace gateway.
-const (
-	// webrtcResolveRetries / webrtcResolveRetryDelay bound how long the converge
-	// waits for a slow/just-restarted node's namespace rqlite to become readable
-	// before giving up on the WebRTC secret. A distant node (high WG RTT) can
-	// take a few seconds to sync; without this it reads empty once and comes up
-	// with TURN disabled (bugboard #130). 5 × 2s = 10s ceiling on the cold path.
-	webrtcResolveRetries    = 5
-	webrtcResolveRetryDelay = 2 * time.Second
-)
-
-// resolveWebRTCConfigWithRetry calls fetch up to `retries` times, sleeping
-// `delay` between attempts, and returns the first result whose error is nil. A
-// distant/just-restarted node's namespace rqlite can take a few seconds to
-// become readable; without the retry the read fails once and the gateway comes
-// up with TURN disabled (bugboard #130). A genuine decrypt failure (stale
-// cluster-secret) also errors and exhausts the retries, returning the final
-// error so the caller can mark the result unresolved. `sleep` is injected so
-// unit tests exercise the loop without real delay.
-func resolveWebRTCConfigWithRetry(retries int, delay time.Duration, sleep func(time.Duration), fetch func() (*WebRTCConfig, error)) (*WebRTCConfig, error) {
-	var cfg *WebRTCConfig
-	var err error
-	for attempt := 0; attempt < retries; attempt++ {
-		cfg, err = fetch()
-		if err == nil {
-			return cfg, nil
-		}
-		if attempt < retries-1 {
-			sleep(delay)
-		}
-	}
-	return cfg, err
-}
-
-// applyResolvedWebRTCToState copies a freshly-resolved WebRTC config into the
-// local cluster state so a future cold start can read the TURN secret from disk
-// instead of the (possibly-slow) namespace rqlite (bugboard #130). Returns true
-// iff the state changed, so the caller only rewrites the on-disk file when
-// there's something to persist. Pure — unit-testable without a live cluster.
-func applyResolvedWebRTCToState(state *ClusterLocalState, wr restoreWebRTC) bool {
-	hasTURN := wr.turnSecret != ""
-	hasSFU := wr.sfuPort > 0
-	if state.TURNSharedSecret == wr.turnSecret &&
-		state.TURNDomain == wr.turnDomain &&
-		state.TURNStealthDomain == wr.stealthDomain &&
-		state.SFUSignalingPort == wr.sfuPort &&
-		state.HasTURN == hasTURN &&
-		state.HasSFU == hasSFU {
-		return false
-	}
-	state.HasTURN = hasTURN
-	state.HasSFU = hasSFU
-	state.TURNSharedSecret = wr.turnSecret
-	state.TURNDomain = wr.turnDomain
-	state.TURNStealthDomain = wr.stealthDomain
-	state.SFUSignalingPort = wr.sfuPort
-	return true
-}
-
 type restoreWebRTC struct {
 	enabled       bool
 	sfuPort       int
 	turnDomain    string
 	turnSecret    string
 	stealthDomain string // feat-124: empty when webrtc stealth is disabled
-	// unresolved is true when the DB lookup ERRORED (vs. resolved-but-not-
-	// enabled) AND the local cache had no secret to fall back to. The caller
-	// must NOT write a WebRTC-disabled gateway config off an unresolved
-	// lookup — that silently kills turn.credentials on a node that should
-	// serve TURN (bugboard #130: a decrypt failure after cluster-secret
-	// rotation was swallowed into "disabled"). enabled is always false when
-	// unresolved.
-	unresolved bool
 }

 // chooseRestoreWebRTC resolves a restored gateway's WebRTC config. TWO
@ -1943,18 +1856,9 @@ type restoreWebRTC struct {
 //   - SFU (sfuPort) is PER-NODE — non-zero only when this node runs a
 //     local SFU (for /v1/webrtc/signal + /rooms proxying).
 //
-// Precedence: DB-FIRST. The namespace_webrtc_config row is the source of
-// truth for the CURRENT TURN secret, so we always consult it. The local
-// cluster-state.json cache (dbFetch's counterpart) is a FALLBACK ONLY —
-// used when the DB read fails (a slow/just-restarted node whose namespace
-// rqlite has not synced yet). This is the bugboard #130 FOLLOW-UP fix: the
-// earlier state-FIRST read short-circuited the DB whenever the cache held a
-// secret and so NEVER re-validated a present-but-stale cached secret. If a
-// secret was rotated (disable→enable) while a node was offline, that node
-// kept serving the OLD secret indefinitely. DB-first means a stale cache
-// can survive at most until the DB becomes readable on the next converge —
-// never indefinitely — while still letting a genuinely DB-down node come up
-// on TURN via the cache (the #130 resilience the cache was added for).
+// Precedence: prefer the local state file; fall back to the DB (source of
+// truth) when the state file lacks the TURN secret (the namespace-wide
+// "webrtc is enabled" marker). dbFetch is lazy — only hit when needed.
 //
 // `enabled` is true when EITHER a TURN secret OR an SFU port is present,
 // so the caller knows to write a webrtc block. A non-SFU gateway gets
@ -1965,47 +1869,43 @@ type restoreWebRTC struct {
 // standing up the full restore path (systemd spawner + DB + port store).
 func chooseRestoreWebRTC(
 	stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret, stateStealthDomain string,
-	dbFetch func() (turnSecret, turnDomain, stealthDomain string, sfuPort int, resolved bool),
+	dbFetch func() (turnSecret, turnDomain, stealthDomain string, sfuPort int),
 ) restoreWebRTC {
-	// DB-first: consult the source of truth before trusting the local cache.
-	dbSecret, dbDomain, dbStealth, dbSFU, resolved := dbFetch()
-	if resolved {
-		// The DB read landed and is authoritative. dbSecret == "" means the
-		// namespace genuinely has no WebRTC enabled — honor that (disable),
-		// do NOT fall back to a possibly-stale cached secret. A present
-		// secret is the CURRENT one and wins over any cached value.
-		if dbSecret == "" {
-			return restoreWebRTC{}
-		}
-		return restoreWebRTC{
-			enabled:       true,
-			sfuPort:       dbSFU,
-			turnDomain:    dbDomain,
-			turnSecret:    dbSecret,
-			stealthDomain: dbStealth,
-		}
-	}
-
-	// The DB/decrypt lookup ERRORED (slow node whose namespace rqlite is not
-	// readable yet, or a decrypt failure after a cluster-secret rotation).
-	// Fall back to the locally-cached secret so TURN still comes up — possibly
-	// stale, but functional, and self-correcting on the next converge once the
-	// DB is readable (NOT indefinite). If the cache is empty too, signal
-	// unresolved so the caller preserves the running gateway config instead of
-	// blanking TURN (bugboard #130).
+	turnSecret := stateTURNSecret
+	turnDomain := stateTURNDomain
+	stealthDomain := stateStealthDomain
 	sfuPort := 0
 	if stateHasSFU && stateSFUPort > 0 {
 		sfuPort = stateSFUPort
 	}
-	if stateTURNSecret == "" && sfuPort == 0 {
-		return restoreWebRTC{unresolved: true}
+
+	// Fall back to the DB when the state file has no TURN secret — that's
+	// the marker that the namespace has WebRTC enabled at all. The state
+	// file is not updated by EnableWebRTC, so a namespace enabled after
+	// the state file was written reaches here with an empty secret.
+	// (Stealth toggles DO rewrite cluster state on every node, so the
+	// state-first read stays fresh for stealthDomain too.)
+	if turnSecret == "" {
+		if dbSecret, dbDomain, dbStealth, dbSFU := dbFetch(); dbSecret != "" {
+			turnSecret = dbSecret
+			if turnDomain == "" {
+				turnDomain = dbDomain
+			}
+			if stealthDomain == "" {
+				stealthDomain = dbStealth
+			}
+			if sfuPort == 0 {
+				sfuPort = dbSFU
+			}
+		}
 	}
+
 	return restoreWebRTC{
-		enabled:       stateTURNSecret != "" || sfuPort > 0,
+		enabled:       turnSecret != "" || sfuPort > 0,
 		sfuPort:       sfuPort,
-		turnDomain:    stateTURNDomain,
-		turnSecret:    stateTURNSecret,
-		stealthDomain: stateStealthDomain,
+		turnDomain:    turnDomain,
+		turnSecret:    turnSecret,
+		stealthDomain: stealthDomain,
 	}
 }

@ -2154,44 +2054,18 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
 			SecretsEncryptionKey:  cm.secretsEncryptionKey,
 		}

-		// Resolve WebRTC config. DB-FIRST (source of truth for the CURRENT
-		// secret); the local state cache is consulted only when the DB read
-		// fails (bugboard #130 follow-up — see chooseRestoreWebRTC). Bugboard
-		// #25 — the state file is NOT updated by EnableWebRTC, so a namespace
-		// enabled AFTER its state file was written carries no SFU/TURN fields
-		// here; reading the DB re-materializes them.
+		// Resolve WebRTC config. Prefer the local state file; fall back to
+		// the DB (source of truth) to self-heal stale state. Bugboard #25 —
+		// the state file is NOT updated by EnableWebRTC, so a namespace
+		// enabled AFTER its state file was written carries no SFU/TURN
+		// fields here. The lazy dbFetch only hits the DB when the state
+		// file is incomplete.
 		wr := chooseRestoreWebRTC(
 			state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret, state.TURNStealthDomain,
-			func() (turnSecret, turnDomain, stealthDomain string, sfuPort int, resolved bool) {
-				// Retry the read on a transient error. A distant/slow node's
-				// namespace rqlite may not be synced/readable yet at cold-start
-				// converge time — without the retry the read fails once and the
-				// gateway is written with TURN disabled (bugboard #130). The
-				// secret IS in the DB; we just need the read to land once the
-				// follower catches up (typically a few seconds). A genuine
-				// decrypt failure (stale key) also errors here and will exhaust
-				// the retries → unresolved → the caller preserves the running
-				// config rather than blanking it.
-				webrtcCfg, err := resolveWebRTCConfigWithRetry(
-					webrtcResolveRetries, webrtcResolveRetryDelay, time.Sleep,
-					func() (*WebRTCConfig, error) {
-						return cm.GetWebRTCConfig(ctx, state.NamespaceName)
-					})
-				if err != nil {
-					// Persistent error after retries (slow read that never
-					// landed, or a decrypt failure). Do NOT swallow into
-					// "disabled" — surface loudly and signal unresolved so the
-					// caller preserves the running config (bugboard #130).
-					cm.logger.Error("WebRTC TURN secret unresolvable on this node after retries — refusing to silently disable TURN; preserving existing gateway config. If this is a cluster-secret rotation, regenerate with `orama namespace disable webrtc` then `orama namespace enable webrtc`.",
-						zap.String("namespace", state.NamespaceName),
-						zap.String("node_id", cm.localNodeID),
-						zap.Int("attempts", webrtcResolveRetries),
-						zap.Error(err))
-					return "", "", "", 0, false
-				}
-				if webrtcCfg == nil {
-					// Resolved cleanly: the namespace genuinely has no WebRTC.
-					return "", "", "", 0, true
+			func() (turnSecret, turnDomain, stealthDomain string, sfuPort int) {
+				webrtcCfg, err := cm.GetWebRTCConfig(ctx, state.NamespaceName)
+				if err != nil || webrtcCfg == nil {
+					return "", "", "", 0
 				}
 				// TURN is namespace-wide; SFU port is per-node and may be
 				// absent on a gateway-only (non-SFU) node — that's fine,
@ -2203,7 +2077,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
 				return webrtcCfg.TURNSharedSecret,
 					fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
 					cm.stealthDomainFor(state.NamespaceName, webrtcCfg),
-					sfu, true
+					sfu
 			},
 		)
 		if wr.enabled {
@ -2215,90 +2089,25 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
 			gwCfg.TURNDomain = wr.turnDomain
 			gwCfg.TURNSecret = wr.turnSecret
 			gwCfg.TURNStealthDomain = wr.stealthDomain
-
-			// Cache the resolved secret into THIS node's local state so that if
-			// the NEXT cold start can't read the namespace rqlite (a distant/
-			// slow node whose follower hasn't synced), chooseRestoreWebRTC can
-			// fall back to this on-disk secret instead of coming up with TURN
-			// disabled (bugboard #130). The cache is a FALLBACK — DB-first
-			// resolution still prefers the live DB secret whenever it's
-			// readable, so this cached value can never pin the node to a stale
-			// secret. Each node self-heals its own cache on a successful
-			// resolve; nothing is sent cross-node.
-			if applyResolvedWebRTCToState(state, wr) {
-				if err := cm.saveLocalState(state); err != nil {
-					cm.logger.Warn("Failed to cache resolved WebRTC config to local state (cold start may fall back to the DB read next boot)",
-						zap.String("namespace", state.NamespaceName), zap.Error(err))
-				} else {
-					cm.logger.Info("Cached resolved WebRTC config to local state for cold-start resilience (bugboard #130)",
-						zap.String("namespace", state.NamespaceName))
-				}
-			}
-		} else if !wr.unresolved {
-			// The DB read RESOLVED that this namespace has NO WebRTC (disabled).
-			// Clear any stale cached secret from local state so a future cold
-			// start that hits a transient DB error can't fall back to it and
-			// resurrect TURN for a disabled namespace — the hole being: a node
-			// that was offline during DisableWebRTC never received the cleared
-			// state push and would otherwise keep serving the old secret. Only
-			// do this on a RESOLVED-disabled read, NEVER on an unresolved
-			// (DB-error) one — there the cache IS the fallback and must survive.
-			if applyResolvedWebRTCToState(state, restoreWebRTC{}) {
-				if err := cm.saveLocalState(state); err != nil {
-					cm.logger.Warn("Failed to clear stale cached WebRTC secret from local state after DB reported the namespace disabled",
-						zap.String("namespace", state.NamespaceName), zap.Error(err))
-				} else {
-					cm.logger.Info("Cleared stale cached WebRTC secret from local state (namespace disabled in DB)",
-						zap.String("namespace", state.NamespaceName))
-				}
-			}
 		}

 		resp, err := http.Get(fmt.Sprintf("http://localhost:%d/v1/health", pb.GatewayHTTPPort))
 		if err == nil {
 			resp.Body.Close()
-			switch {
-			case wr.unresolved:
-				// Bugboard #130 guard: the WebRTC secret could not be resolved
-				// (DB/decrypt error, logged above). The gateway is already up
-				// and may be serving TURN from a valid on-disk secret — do NOT
-				// reconcile it to the empty/disabled block we'd otherwise
-				// build, which would kill turn.credentials on this node. Leave
-				// the running config untouched; the operator regenerates the
-				// secret.
-				//
-				// Note: this also defers ReconcileGateway's #837
-				// secrets-encryption-key reconcile for this one converge pass.
-				// That is acceptable — the operator action that fixes the
-				// unresolved TURN secret (regenerate + restart) re-runs the
-				// full reconcile, and pre-fix this path would have corrupted
-				// the WebRTC block anyway.
-				cm.logger.Error("Gateway up but WebRTC secret unresolved — skipping reconcile to avoid disabling TURN on the running config (bugboard #130)",
-					zap.String("namespace", state.NamespaceName))
-			default:
-				// Gateway is already up. Reconcile config drift (bugboard #25 —
-				// the WARM case): if the running gateway's on-disk config has a
-				// WebRTC block that differs from the desired (e.g. it lost the
-				// block on a prior restart where it stayed healthy and the
-				// cold-spawn path below never ran), rewrite the config +
-				// restart. ReconcileGateway is a no-op when the on-disk block
-				// already matches, so this does NOT cause a restart loop.
-				if rerr := cm.systemdSpawner.ReconcileGateway(ctx, state.NamespaceName, cm.localNodeID, gwCfg); rerr != nil {
-					cm.logger.Warn("Gateway WebRTC reconcile failed (leaving running config as-is)",
-						zap.String("namespace", state.NamespaceName), zap.Error(rerr))
-				}
+			// Gateway is already up. Reconcile config drift (bugboard #25 —
+			// the WARM case): if the running gateway's on-disk config has a
+			// WebRTC block that differs from the desired (e.g. it lost the
+			// block on a prior restart where it stayed healthy and the
+			// cold-spawn path below never ran), rewrite the config + restart.
+			// ReconcileGateway is a no-op when the on-disk block already
+			// matches, so this does NOT cause a restart loop on every boot.
+			if rerr := cm.systemdSpawner.ReconcileGateway(ctx, state.NamespaceName, cm.localNodeID, gwCfg); rerr != nil {
+				cm.logger.Warn("Gateway WebRTC reconcile failed (leaving running config as-is)",
+					zap.String("namespace", state.NamespaceName), zap.Error(rerr))
 			}
 		} else {
-			// Gateway is down → cold spawn. We must bring a gateway up
-			// regardless (the namespace needs one); but if the WebRTC secret
-			// was unresolved we can't write a working TURN block, so warn
-			// loudly that TURN is degraded on this node until the secret is
-			// regenerated (bugboard #130).
-			switch {
-			case wr.unresolved:
-				cm.logger.Error("Cold-spawning gateway with TURN UNAVAILABLE — WebRTC secret unresolved on this node; turn.credentials will return namespace_not_configured until it is regenerated (`orama namespace disable webrtc` then `orama namespace enable webrtc`)",
-					zap.String("namespace", state.NamespaceName))
-			case wr.enabled && !state.HasSFU:
+			// Gateway is down → cold spawn with the resolved config.
+			if wr.enabled && !state.HasSFU {
 				cm.logger.Info("Re-materialized WebRTC gateway config from DB (state file was stale)",
 					zap.String("namespace", state.NamespaceName),
 					zap.Int("sfu_port", wr.sfuPort))
--- a/core/pkg/namespace/cluster_state_perms_test.go
+++ b/core/pkg/namespace/cluster_state_perms_test.go
@ -1,71 +0,0 @@
-package namespace
-
-import (
-	"os"
-	"path/filepath"
-	"testing"
-
-	"go.uber.org/zap"
-)
-
-// Bugboard #130 — cluster-state.json carries the namespace TURN shared secret
-// (plaintext HMAC), so every writer of it must produce a 0600 file and tighten
-// any pre-existing world-readable file on rewrite. SaveClusterState is the
-// RECEIVER-side writer that persists state pushed from the coordinator to a
-// remote namespace node; without this it landed 0644.
-
-func TestSaveClusterState_writes0600(t *testing.T) {
-	base := t.TempDir()
-	s := &SystemdSpawner{namespaceBase: base, logger: zap.NewNop()}
-
-	if err := s.SaveClusterState("ns-test", []byte(`{"turn_shared_secret":"sek-123"}`)); err != nil {
-		t.Fatalf("SaveClusterState: %v", err)
-	}
-
-	path := filepath.Join(base, "ns-test", "cluster-state.json")
-	info, err := os.Stat(path)
-	if err != nil {
-		t.Fatalf("stat cluster-state.json: %v", err)
-	}
-	if perm := info.Mode().Perm(); perm != 0600 {
-		t.Errorf("cluster-state.json mode = %o; want 0600 (it carries the TURN secret)", perm)
-	}
-	// No leftover temp file from the atomic write.
-	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
-		t.Errorf("temp file should not survive a successful save; stat err = %v", err)
-	}
-}
-
-func TestSaveClusterState_tightensExisting0644(t *testing.T) {
-	base := t.TempDir()
-	s := &SystemdSpawner{namespaceBase: base, logger: zap.NewNop()}
-
-	// Simulate a file an older release wrote world-readable.
-	dir := filepath.Join(base, "ns-test")
-	if err := os.MkdirAll(dir, 0755); err != nil {
-		t.Fatal(err)
-	}
-	path := filepath.Join(dir, "cluster-state.json")
-	if err := os.WriteFile(path, []byte(`{"old":true}`), 0644); err != nil {
-		t.Fatal(err)
-	}
-
-	if err := s.SaveClusterState("ns-test", []byte(`{"turn_shared_secret":"sek-new"}`)); err != nil {
-		t.Fatalf("SaveClusterState: %v", err)
-	}
-
-	info, err := os.Stat(path)
-	if err != nil {
-		t.Fatalf("stat cluster-state.json: %v", err)
-	}
-	if perm := info.Mode().Perm(); perm != 0600 {
-		t.Errorf("rewrite did not tighten perms: mode = %o; want 0600", perm)
-	}
-	data, err := os.ReadFile(path)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if string(data) != `{"turn_shared_secret":"sek-new"}` {
-		t.Errorf("content not replaced atomically: %s", data)
-	}
-}
--- a/core/pkg/namespace/leader_locality.go
+++ b/core/pkg/namespace/leader_locality.go
@ -1,213 +0,0 @@
-package namespace
-
-import (
-	"context"
-	"net"
-	"path/filepath"
-	"time"
-
-	"github.com/DeBrosOfficial/network/pkg/rqlite"
-	"go.uber.org/zap"
-)
-
-// Bugboard #708 — namespace raft leadership is geography-blind: the initial
-// leader is sortedNodeIDs[0] over random libp2p peer IDs, and raft re-elects
-// freely on every restart. When a geographically-distant node (high WireGuard
-// RTT to its peers) becomes the leader, EVERY namespace write funnels through
-// the distant node and waits on its cross-region replication for quorum — each
-// rqlite hop jumps from ~20ms (co-located) to ~256ms, stacking into 5-10s RPCs
-// that break calling.
-//
-// This reconciler keeps namespace leadership on a co-located voter. It NEVER
-// removes a node or changes voter membership — all nodes stay voters (quorum
-// and fault tolerance unchanged). It only hands leadership OFF a node that is
-// isolated from the rest of the cluster, using rqlite's own
-// transfer-leadership API.
-const (
-	// leaderLocalityInterval is how often each node checks whether the
-	// namespace clusters it leads are well-placed.
-	leaderLocalityInterval = 90 * time.Second
-	// leaderLocalityRTTThreshold: if the leader's CLOSEST voter peer is farther
-	// than this, the leader is treated as geographically isolated and hands off
-	// leadership. Co-located nodes are ~20ms apart; a distant node is ~256ms —
-	// 100ms cleanly separates the two without false positives.
-	leaderLocalityRTTThreshold = 100 * time.Millisecond
-	// leaderLocalityCooldown bounds how often a single namespace's leadership
-	// is moved. In the common topology (a lone distant node among co-located
-	// peers) ONE transfer settles leadership on a co-located voter, which then
-	// stays (it has a nearby peer, so it never re-triggers). In a pathological
-	// all-mutually-distant topology there is no good leader to move to and the
-	// nearest-peer transfer would rotate; the cooldown caps that to roughly one
-	// transfer per node per window (bounded, non-destructive — membership and
-	// quorum are never touched), and node selection clustering most nodes
-	// ~20ms apart makes that case rare.
-	leaderLocalityCooldown = 10 * time.Minute
-	// leaderLocalityDialTimeout bounds each per-peer RTT probe.
-	leaderLocalityDialTimeout = 3 * time.Second
-)
-
-// decideLeadershipTransfer is the pure decision: should the local leader hand
-// off leadership, and to which voter? peerRTTs maps each OTHER reachable voter's
-// raft address → measured RTT. Returns a target and true ONLY when this node is
-// the leader, every voter is reachable (don't destabilize an already-degraded
-// cluster), the cooldown has elapsed, and even the CLOSEST peer is farther than
-// `threshold` — i.e. the leader is isolated. If the leader has at least one
-// nearby voter it is central enough; leave it. The chosen target is the nearest
-// reachable peer (which, in a 1-distant/N-close topology, is a co-located node
-// that will then have a nearby peer of its own → stable).
-func decideLeadershipTransfer(isLeader, allVotersReachable, cooldownElapsed bool, peerRTTs map[string]time.Duration, threshold time.Duration) (string, bool) {
-	if !isLeader || !allVotersReachable || !cooldownElapsed || len(peerRTTs) == 0 {
-		return "", false
-	}
-	var bestAddr string
-	var bestRTT time.Duration
-	for addr, rtt := range peerRTTs {
-		if bestAddr == "" || rtt < bestRTT {
-			bestAddr, bestRTT = addr, rtt
-		}
-	}
-	if bestRTT > threshold {
-		return bestAddr, true
-	}
-	return "", false
-}
-
-// measurePeerRTTs probes every OTHER voter's raft address and returns their
-// RTTs plus whether ALL voters were reachable+measurable (so the caller can
-// refuse to act on a degraded cluster). Non-voters and self are skipped.
-func measurePeerRTTs(nodes rqlite.RQLiteNodes, selfID string) (map[string]time.Duration, bool) {
-	peerRTTs := make(map[string]time.Duration)
-	allReachable := true
-	for _, n := range nodes {
-		if !n.Voter || n.ID == selfID {
-			continue
-		}
-		if !n.Reachable {
-			allReachable = false
-			continue
-		}
-		dialAddr := n.Address
-		if dialAddr == "" {
-			dialAddr = n.ID
-		}
-		rtt, derr := measureRaftRTT(dialAddr, leaderLocalityDialTimeout)
-		if derr != nil {
-			allReachable = false
-			continue
-		}
-		peerRTTs[n.ID] = rtt
-	}
-	return peerRTTs, allReachable
-}
-
-// measureRaftRTT returns the TCP-connect time to a peer's raft address — a
-// privilege-free proxy for WireGuard round-trip latency.
-func measureRaftRTT(raftAddr string, timeout time.Duration) (time.Duration, error) {
-	start := time.Now()
-	conn, err := net.DialTimeout("tcp", raftAddr, timeout)
-	if err != nil {
-		return 0, err
-	}
-	_ = conn.Close()
-	return time.Since(start), nil
-}
-
-func (cm *ClusterManager) leaderTransferCooldownElapsed(namespace string) bool {
-	cm.leaderLocalityMu.Lock()
-	defer cm.leaderLocalityMu.Unlock()
-	last, ok := cm.leaderLocalityCooldown[namespace]
-	return !ok || time.Since(last) >= leaderLocalityCooldown
-}
-
-func (cm *ClusterManager) recordLeaderTransfer(namespace string) {
-	cm.leaderLocalityMu.Lock()
-	defer cm.leaderLocalityMu.Unlock()
-	if cm.leaderLocalityCooldown == nil {
-		cm.leaderLocalityCooldown = make(map[string]time.Time)
-	}
-	cm.leaderLocalityCooldown[namespace] = time.Now()
-}
-
-// StartLeaderLocalityReconciler runs the periodic leadership-locality check
-// until ctx is cancelled. Safe to call once at node boot.
-func (cm *ClusterManager) StartLeaderLocalityReconciler(ctx context.Context) {
-	go func() {
-		ticker := time.NewTicker(leaderLocalityInterval)
-		defer ticker.Stop()
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-ticker.C:
-				cm.reconcileLeaderLocality(ctx)
-			}
-		}
-	}()
-}
-
-// reconcileLeaderLocality checks every namespace cluster this node hosts and,
-// for any it currently leads from an isolated position, transfers leadership to
-// the nearest co-located voter.
-func (cm *ClusterManager) reconcileLeaderLocality(ctx context.Context) {
-	pattern := filepath.Join(cm.baseDataDir, "*", "cluster-state.json")
-	matches, err := filepath.Glob(pattern)
-	if err != nil {
-		cm.logger.Debug("leader-locality: glob failed", zap.Error(err))
-		return
-	}
-	for _, path := range matches {
-		if ctx.Err() != nil {
-			return
-		}
-		state, err := loadLocalState(path)
-		if err != nil {
-			continue
-		}
-		cm.reconcileNamespaceLeader(state.NamespaceName, state.LocalPorts.RQLiteHTTPPort)
-	}
-}
-
-// reconcileNamespaceLeader handles a single namespace's leadership locality.
-func (cm *ClusterManager) reconcileNamespaceLeader(namespace string, rqliteHTTPPort int) {
-	if rqliteHTTPPort == 0 {
-		return
-	}
-	status, err := rqlite.GetRaftStatus(rqliteHTTPPort)
-	if err != nil {
-		// rqlite not up / not reachable on this node — nothing to do.
-		return
-	}
-	if status.Store.Raft.State != "Leader" {
-		return // only the leader can transfer leadership away
-	}
-	selfID := status.Store.Raft.LeaderID
-
-	nodes, err := rqlite.GetRaftNodes(rqliteHTTPPort)
-	if err != nil {
-		return
-	}
-
-	peerRTTs, allVotersReachable := measurePeerRTTs(nodes, selfID)
-
-	target, transfer := decideLeadershipTransfer(
-		true, allVotersReachable, cm.leaderTransferCooldownElapsed(namespace),
-		peerRTTs, leaderLocalityRTTThreshold,
-	)
-	if !transfer {
-		return
-	}
-
-	cm.logger.Info("leader-locality: this node is an isolated namespace raft leader — transferring leadership to a co-located voter (bugboard #708)",
-		zap.String("namespace", namespace),
-		zap.String("from", selfID),
-		zap.String("to", target),
-		zap.Duration("target_rtt", peerRTTs[target]),
-	)
-	// Record the cooldown BEFORE the transfer so a slow/looping transfer can't
-	// re-fire on the next tick regardless of outcome.
-	cm.recordLeaderTransfer(namespace)
-	if err := rqlite.TransferLeadershipTo(rqliteHTTPPort, target, cm.logger); err != nil {
-		cm.logger.Warn("leader-locality: leadership transfer failed",
-			zap.String("namespace", namespace), zap.Error(err))
-	}
-}
--- a/core/pkg/namespace/leader_locality_test.go
+++ b/core/pkg/namespace/leader_locality_test.go
@ -1,93 +0,0 @@
-package namespace
-
-import (
-	"testing"
-	"time"
-)
-
-// Bugboard #708 — the leadership-locality reconciler hands leadership off a
-// geographically-isolated namespace raft leader to the nearest co-located
-// voter, without changing membership. These pin the decision logic.
-
-const thr = 100 * time.Millisecond
-
-func TestDecideLeadershipTransfer_isolatedLeaderTransfersToNearest(t *testing.T) {
-	// Distant leader (109): both peers are far. Transfer to the NEAREST (57 @235ms).
-	peers := map[string]time.Duration{
-		"10.0.0.6:10001": 256 * time.Millisecond, // 51
-		"10.0.0.1:10001": 235 * time.Millisecond, // 57
-	}
-	target, transfer := decideLeadershipTransfer(true, true, true, peers, thr)
-	if !transfer {
-		t.Fatal("an isolated leader (closest peer 235ms > 100ms) must transfer")
-	}
-	if target != "10.0.0.1:10001" {
-		t.Errorf("must transfer to the NEAREST peer; got %q", target)
-	}
-}
-
-func TestDecideLeadershipTransfer_centralLeaderStays(t *testing.T) {
-	// Co-located leader (51): has a nearby peer (57 @20ms) and a distant one (109).
-	// min RTT 20ms < 100ms → leader is central → NO transfer (the correct steady state).
-	peers := map[string]time.Duration{
-		"10.0.0.1:10001":  20 * time.Millisecond,  // 57 (close)
-		"10.0.0.11:10001": 256 * time.Millisecond, // 109 (far)
-	}
-	if _, transfer := decideLeadershipTransfer(true, true, true, peers, thr); transfer {
-		t.Error("a leader with a nearby voter is central enough; must NOT transfer")
-	}
-}
-
-func TestDecideLeadershipTransfer_allDistantTransfersToNearest(t *testing.T) {
-	// Pathological all-mutually-distant topology: every peer is far, so there is
-	// no truly co-located target. The reconciler still moves to the NEAREST
-	// (best available); the per-namespace cooldown (TestLeaderTransferCooldown)
-	// is what bounds the resulting churn to ~one transfer per node per window.
-	peers := map[string]time.Duration{
-		"a": 250 * time.Millisecond,
-		"b": 210 * time.Millisecond,
-	}
-	target, transfer := decideLeadershipTransfer(true, true, true, peers, thr)
-	if !transfer || target != "b" {
-		t.Errorf("all-distant: expected transfer to nearest 'b'; got transfer=%v target=%q", transfer, target)
-	}
-}
-
-func TestDecideLeadershipTransfer_guards(t *testing.T) {
-	farPeers := map[string]time.Duration{"p": 300 * time.Millisecond}
-
-	if _, transfer := decideLeadershipTransfer(false, true, true, farPeers, thr); transfer {
-		t.Error("non-leader must never transfer")
-	}
-	if _, transfer := decideLeadershipTransfer(true, false, true, farPeers, thr); transfer {
-		t.Error("must not transfer when a voter is unreachable (degraded cluster)")
-	}
-	if _, transfer := decideLeadershipTransfer(true, true, false, farPeers, thr); transfer {
-		t.Error("must not transfer during cooldown")
-	}
-	if _, transfer := decideLeadershipTransfer(true, true, true, map[string]time.Duration{}, thr); transfer {
-		t.Error("must not transfer with no measurable peers (single-node / all-unreachable)")
-	}
-}
-
-func TestDecideLeadershipTransfer_exactlyThresholdStays(t *testing.T) {
-	// Closest peer exactly at the threshold is NOT > threshold → stay (no churn at the boundary).
-	peers := map[string]time.Duration{"p": thr}
-	if _, transfer := decideLeadershipTransfer(true, true, true, peers, thr); transfer {
-		t.Error("RTT exactly at the threshold must not trigger a transfer")
-	}
-}
-
-func TestLeaderTransferCooldown(t *testing.T) {
-	cm := &ClusterManager{}
-	if !cm.leaderTransferCooldownElapsed("ns") {
-		t.Error("fresh namespace (no prior transfer) must be out of cooldown")
-	}
-	cm.recordLeaderTransfer("ns")
-	if cm.leaderTransferCooldownElapsed("ns") {
-		t.Error("immediately after a transfer the namespace must be in cooldown")
-	}
-	if !cm.leaderTransferCooldownElapsed("other-ns") {
-		t.Error("cooldown must be per-namespace")
-	}
-}
--- a/core/pkg/namespace/restore_webrtc_test.go
+++ b/core/pkg/namespace/restore_webrtc_test.go
@ -1,69 +1,39 @@
 package namespace

-import (
-	"errors"
-	"testing"
-	"time"
-)
+import "testing"

 // Bugboard #25 — WebRTC config drift on restart + TURN/SFU decouple.
-// Bugboard #130 follow-up — DB-FIRST resolution so a stale cached secret can
-// never be served indefinitely.
 //
-// chooseRestoreWebRTC resolves a restored gateway's WebRTC config DB-FIRST
-// (the namespace_webrtc_config row is the source of truth for the current
-// secret); the local cluster-state.json cache is a FALLBACK consulted only
-// when the DB read fails (a slow node whose namespace rqlite hasn't synced).
-// It also DECOUPLES the two aspects: TURN (secret + domain) is namespace-wide
-// so ANY gateway can serve credentials; the SFU port is per-node (0 on a
-// gateway-only node). Pins the drift fallback, the non-SFU-gateway case, and
-// the DB-first precedence (DB secret wins over a cached/stale one).
+// chooseRestoreWebRTC resolves a restored gateway's WebRTC config from the
+// local state file (which EnableWebRTC does NOT update) with a DB fallback
+// (source of truth). It also DECOUPLES the two aspects: TURN (secret +
+// domain) is namespace-wide so ANY gateway can serve credentials; the SFU
+// port is per-node (0 on a gateway-only node). Pins both the drift
+// fallback and the non-SFU-gateway case.

-// dbFetch signature: () -> (turnSecret, turnDomain, stealthDomain string, sfuPort int, resolved bool).
-// resolved=true means the lookup completed (with or without a config);
-// resolved=false means it ERRORED (e.g. decrypt failure) → unresolved.
-func dbNone() (string, string, string, int, bool) { return "", "", "", 0, true }
+// dbFetch signature: () -> (turnSecret, turnDomain, stealthDomain string, sfuPort int).
+func dbNone() (string, string, string, int) { return "", "", "", 0 }

-// dbError models a DB/decrypt failure: the lookup did not complete.
-func dbError() (string, string, string, int, bool) { return "", "", "", 0, false }
-
-func dbFull(secret, domain string, sfuPort int) func() (string, string, string, int, bool) {
-	return func() (string, string, string, int, bool) { return secret, domain, "", sfuPort, true }
+func dbFull(secret, domain string, sfuPort int) func() (string, string, string, int) {
+	return func() (string, string, string, int) { return secret, domain, "", sfuPort }
 }

-func TestChooseRestoreWebRTC_dbSecretWinsOverCachedState(t *testing.T) {
-	// THE #130 FOLLOW-UP (staleness) case. The state file holds a cached
-	// secret, but the DB (source of truth) has a DIFFERENT, current secret —
-	// e.g. the secret was rotated (disable→enable) while this node was offline.
-	// DB-first MUST serve the current DB secret, NOT the stale cached one. The
-	// old state-first logic short-circuited the DB here and served "old-secret"
-	// indefinitely.
-	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "old-secret", "cdn-old.dbrs.space",
-		dbFull("new-secret", "turn.ns-x.dbrs.space", 7800))
+func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
+	// State file has TURN secret → use it, and NEVER consult the DB
+	// (the lazy dbFetch must not be called — saves a query on the hot
+	// restart path).
+	dbCalled := false
+	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "",
+		func() (string, string, string, int) { dbCalled = true; return dbNone() })

-	if !got.enabled {
-		t.Fatal("DB has a current secret; result must be enabled")
+	if dbCalled {
+		t.Error("DB fetch was called even though the state file had the TURN secret (should short-circuit)")
 	}
-	if got.turnSecret != "new-secret" {
-		t.Errorf("BUG #130 STALENESS: turnSecret = %q; want new-secret (the current DB value, not the stale cache)", got.turnSecret)
+	if !got.enabled || got.sfuPort != 7800 || got.turnSecret != "state-secret" {
+		t.Errorf("want state-file values; got %+v", got)
 	}
-	if got.sfuPort != 7800 || got.turnDomain != "turn.ns-x.dbrs.space" {
-		t.Errorf("want DB-derived block; got %+v", got)
-	}
-}
-
-func TestChooseRestoreWebRTC_dbDisabledOverridesCachedSecret(t *testing.T) {
-	// The cache holds a secret but the DB read completes and reports NO WebRTC
-	// (the namespace was disabled while this node was offline). DB-first must
-	// honor the disable, NOT keep serving the stale cached secret.
-	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "stale-secret", "",
-		dbNone) // dbNone = resolved, no config
-
-	if got.enabled {
-		t.Errorf("DB reports disabled: must not keep serving the cached secret; got %+v", got)
-	}
-	if got.unresolved {
-		t.Error("a clean resolved-but-disabled lookup must not be marked unresolved")
+	if got.turnDomain != "turn.ns-x.dbrs.space" {
+		t.Errorf("turnDomain = %q; want state-file value", got.turnDomain)
 	}
 }

@ -109,19 +79,19 @@ func TestChooseRestoreWebRTC_nonSFUGatewayGetsTURNOnly(t *testing.T) {
 	}
 }

-func TestChooseRestoreWebRTC_cachedTurnOnlyFallbackOnDBError(t *testing.T) {
-	// A non-SFU node holds a cached TURN secret (HasSFU false / port 0) and the
-	// DB read ERRORS (its namespace rqlite isn't readable yet at cold start).
-	// DB-first falls back to the cached secret so the gateway still serves TURN
-	// credentials — sfuPort stays 0 (no local SFU). This is the #130 resilience
-	// the cache exists for.
-	got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret", "", dbError)
+func TestChooseRestoreWebRTC_stateHasTURNButNoSFU(t *testing.T) {
+	// State file for a non-SFU node: it has the TURN secret but HasSFU is
+	// false / port 0. Must use the state TURN secret with sfuPort=0 and
+	// NOT consult the DB (TURN secret present = complete enough).
+	dbCalled := false
+	got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret", "",
+		func() (string, string, string, int) { dbCalled = true; return dbNone() })

-	if !got.enabled || got.sfuPort != 0 || got.turnSecret != "state-secret" {
-		t.Errorf("want cached TURN-only fallback (sfuPort 0); got %+v", got)
+	if dbCalled {
+		t.Error("DB fetch called even though state file had the TURN secret")
 	}
-	if got.unresolved {
-		t.Error("a usable cached secret must not be marked unresolved")
+	if !got.enabled || got.sfuPort != 0 || got.turnSecret != "state-secret" {
+		t.Errorf("want TURN-only from state (sfuPort 0); got %+v", got)
 	}
 }

@ -140,7 +110,7 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {
 	// enablement marker; without it we treat it as not-configured-for-
 	// TURN, but an SFU port alone still enables SFU routes.
 	got := chooseRestoreWebRTC(false, 0, "", "", "",
-		func() (string, string, string, int, bool) { return "", "turn.db", "", 9000, true })
+		func() (string, string, string, int) { return "", "turn.db", "", 9000 })
 	// dbFetch only runs when state secret is empty; here it returns no
 	// secret, so the `if dbSecret != ""` guard means NOTHING is taken
 	// from the DB → disabled. (An SFU-only-no-TURN namespace is not a
@ -152,14 +122,16 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {

 // --- feat-124 stealth domain restore precedence ---

-func TestChooseRestoreWebRTC_stealthFromCacheOnDBError(t *testing.T) {
-	// When the DB read errors, the cache fallback carries the whole block —
-	// including the cached stealth domain — so a stealth-enabled namespace
-	// keeps advertising its stealth rung on a cold start that can't reach the
-	// DB yet.
-	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "cdn-abc123def456.dbrs.space", dbError)
-	if !got.enabled || got.stealthDomain != "cdn-abc123def456.dbrs.space" {
-		t.Errorf("stealthDomain = %q; want cached value on DB-error fallback; got %+v", got.stealthDomain, got)
+func TestChooseRestoreWebRTC_stealthFromStateFile(t *testing.T) {
+	// Stealth toggles rewrite cluster state, so a fresh state file carries
+	// the stealth domain and must win without a DB call.
+	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "cdn-abc123def456.dbrs.space",
+		func() (string, string, string, int) {
+			t.Error("DB fetch called even though state file was complete")
+			return dbNone()
+		})
+	if got.stealthDomain != "cdn-abc123def456.dbrs.space" {
+		t.Errorf("stealthDomain = %q; want state-file value", got.stealthDomain)
 	}
 }

@ -167,210 +139,19 @@ func TestChooseRestoreWebRTC_stealthFromDBOnStaleState(t *testing.T) {
 	// Stale state (no TURN secret) + DB has stealth enabled → stealth domain
 	// re-materializes from the DB alongside the rest of the WebRTC block.
 	got := chooseRestoreWebRTC(false, 0, "", "", "",
-		func() (string, string, string, int, bool) {
-			return "db-secret", "turn.ns-x.dbrs.space", "cdn-abc123def456.dbrs.space", 7801, true
+		func() (string, string, string, int) {
+			return "db-secret", "turn.ns-x.dbrs.space", "cdn-abc123def456.dbrs.space", 7801
 		})
 	if !got.enabled || got.stealthDomain != "cdn-abc123def456.dbrs.space" {
 		t.Errorf("want stealth domain from DB on stale state; got %+v", got)
 	}
 }

-// --- bugboard #130: distinguish "unresolved (DB/decrypt error)" from "disabled" ---
-
-func TestChooseRestoreWebRTC_dbErrorMarksUnresolvedNotDisabled(t *testing.T) {
-	// The bug-130 case: state file has no secret (freshly-joined node) and
-	// the DB lookup ERRORS (e.g. the stored TURN secret can't be decrypted
-	// after a cluster-secret rotation). This MUST surface as unresolved —
-	// NOT as a clean "disabled" — so the caller preserves the running config
-	// instead of writing a TURN-disabled gateway (which made turn.credentials
-	// return namespace_not_configured).
-	got := chooseRestoreWebRTC(false, 0, "", "", "", dbError)
-
-	if !got.unresolved {
-		t.Fatal("BUG #130 REGRESSION: a DB/decrypt error must mark the result unresolved")
-	}
-	if got.enabled {
-		t.Errorf("unresolved result must never be enabled (would write a config off an errored lookup); got %+v", got)
-	}
-	if got.turnSecret != "" {
-		t.Errorf("unresolved result must carry no secret; got %q", got.turnSecret)
-	}
-}
-
-func TestChooseRestoreWebRTC_resolvedEmptyIsDisabledNotUnresolved(t *testing.T) {
-	// The contrast case: the DB lookup COMPLETES and reports no WebRTC
-	// (genuinely disabled namespace). This must be disabled, NOT unresolved —
-	// the caller is free to write the empty/disabled config here.
-	got := chooseRestoreWebRTC(false, 0, "", "", "", dbNone)
-
-	if got.unresolved {
-		t.Error("a clean resolved-but-empty lookup must NOT be marked unresolved")
-	}
-	if got.enabled {
-		t.Errorf("genuinely-disabled namespace must be disabled; got %+v", got)
-	}
-}
-
-func TestChooseRestoreWebRTC_cachedSecretSurvivesDBError(t *testing.T) {
-	// A node that holds the TURN secret in its state file must NOT be disabled
-	// by a flaky/unsynced DB — when the DB read errors, DB-first falls back to
-	// the cached secret and stays enabled (not unresolved). Guards against the
-	// #130 fix accidentally disabling nodes when the DB is briefly unreadable.
-	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "", dbError)
-	if got.unresolved || !got.enabled || got.turnSecret != "state-secret" {
-		t.Errorf("cached secret must survive a DB error and stay enabled; got %+v", got)
-	}
-}
-
 func TestChooseRestoreWebRTC_noStealthStaysEmpty(t *testing.T) {
-	// Stealth disabled → empty stealthDomain (gateway advertises the baseline
-	// 3-rung ladder only). Uses the cache-fallback path (DB error) so an
-	// enabled-but-no-stealth config is exercised end to end.
-	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "", dbError)
-	if !got.enabled || got.stealthDomain != "" {
-		t.Errorf("stealthDomain = %q; want empty when stealth is disabled; got %+v", got.stealthDomain, got)
-	}
-}
-
-// ----------------------------------------------------------------------------
-// Bugboard #130 — cache the resolved WebRTC secret into local state so a slow
-// node's cold start reads it from disk instead of the (slow) namespace rqlite.
-// ----------------------------------------------------------------------------
-
-func TestApplyResolvedWebRTCToState_populatesAndReportsChange(t *testing.T) {
-	st := &ClusterLocalState{} // fresh node: no cached secret (the #130 gap)
-	wr := restoreWebRTC{enabled: true, turnSecret: "sek-123", turnDomain: "turn.ns-x.dbrs.space", stealthDomain: "cdn-abc.dbrs.space", sfuPort: 30000}
-
-	if !applyResolvedWebRTCToState(st, wr) {
-		t.Fatal("expected change=true when caching a secret into empty state")
-	}
-	if st.TURNSharedSecret != "sek-123" {
-		t.Errorf("TURNSharedSecret = %q; want sek-123 (must be cached for cold start)", st.TURNSharedSecret)
-	}
-	if !st.HasTURN || !st.HasSFU || st.SFUSignalingPort != 30000 ||
-		st.TURNDomain != "turn.ns-x.dbrs.space" || st.TURNStealthDomain != "cdn-abc.dbrs.space" {
-		t.Errorf("state not fully populated: %+v", st)
-	}
-
-	// The whole point of caching: on a SECOND boot where the DB read fails
-	// (slow node, namespace rqlite not synced), the cached secret lets the
-	// gateway still come up on TURN (DB-first falls back to the cache).
-	got := chooseRestoreWebRTC(st.HasSFU, st.SFUSignalingPort, st.TURNDomain, st.TURNSharedSecret, st.TURNStealthDomain, dbError)
-	if !got.enabled || got.unresolved || got.turnSecret != "sek-123" {
-		t.Errorf("cached cold start should fall back to the state secret on a DB error; got %+v", got)
-	}
-}
-
-func TestApplyResolvedWebRTCToState_noChangeWhenAlreadyCached(t *testing.T) {
-	st := &ClusterLocalState{HasTURN: true, HasSFU: true, TURNSharedSecret: "sek-123", TURNDomain: "d", TURNStealthDomain: "s", SFUSignalingPort: 30000}
-	wr := restoreWebRTC{enabled: true, turnSecret: "sek-123", turnDomain: "d", stealthDomain: "s", sfuPort: 30000}
-	if applyResolvedWebRTCToState(st, wr) {
-		t.Error("expected change=false (no rewrite) when state already matches the resolved config")
-	}
-}
-
-func TestApplyResolvedWebRTCToState_turnOnlyNode_noSFU(t *testing.T) {
-	// A gateway-only node (serves TURN credentials, runs no local SFU): secret
-	// set, sfuPort 0. Must still cache the secret + report HasTURN, HasSFU=false.
-	st := &ClusterLocalState{}
-	if !applyResolvedWebRTCToState(st, restoreWebRTC{enabled: true, turnSecret: "sek", turnDomain: "d", sfuPort: 0}) {
-		t.Fatal("want change=true")
-	}
-	if !st.HasTURN || st.HasSFU || st.TURNSharedSecret != "sek" {
-		t.Errorf("turn-only node: want HasTURN=true HasSFU=false secret cached; got %+v", st)
-	}
-}
-
-func TestApplyResolvedWebRTCToState_clearsCacheOnDisable(t *testing.T) {
-	// When the DB resolves the namespace as DISABLED, the caller applies an
-	// empty restoreWebRTC to wipe any stale cached secret from local state — so
-	// a node that was offline during DisableWebRTC can't later fall back to the
-	// old secret on a transient DB error and resurrect TURN for a disabled
-	// namespace. Must report change=true and zero out the cached fields.
-	st := &ClusterLocalState{HasTURN: true, HasSFU: true, TURNSharedSecret: "stale-secret", TURNDomain: "turn.ns-x.dbrs.space", SFUSignalingPort: 7800}
-
-	if !applyResolvedWebRTCToState(st, restoreWebRTC{}) {
-		t.Fatal("disable: want change=true when clearing a cached secret")
-	}
-	if st.TURNSharedSecret != "" || st.HasTURN || st.HasSFU || st.SFUSignalingPort != 0 || st.TURNDomain != "" {
-		t.Errorf("cache not fully cleared on disable: %+v", st)
-	}
-}
-
-func TestApplyResolvedWebRTCToState_secretRotationReportsChange(t *testing.T) {
-	// Secret rotation: the state holds an OLD cached secret and a fresh resolve
-	// brings the NEW (rotated) secret. applyResolvedWebRTCToState MUST report
-	// change=true and overwrite the cache, so the node's fallback secret tracks
-	// the rotation instead of persisting a stale value on disk (bugboard #130
-	// follow-up — the cache must never lag the rotated secret).
-	st := &ClusterLocalState{HasTURN: true, TURNSharedSecret: "old-secret", TURNDomain: "turn.ns-x.dbrs.space"}
-	wr := restoreWebRTC{enabled: true, turnSecret: "new-secret", turnDomain: "turn.ns-x.dbrs.space"}
-
-	if !applyResolvedWebRTCToState(st, wr) {
-		t.Fatal("rotation: want change=true when the resolved secret differs from the cached one")
-	}
-	if st.TURNSharedSecret != "new-secret" {
-		t.Errorf("cache not updated to the rotated secret: got %q; want new-secret", st.TURNSharedSecret)
-	}
-}
-
-// ----------------------------------------------------------------------------
-// Bugboard #130 — the cold-start read retries so a slow node's namespace
-// rqlite read lands once the follower syncs, instead of failing once and
-// coming up with TURN disabled.
-// ----------------------------------------------------------------------------
-
-func TestResolveWebRTCConfigWithRetry_succeedsOnNthAttempt(t *testing.T) {
-	// The read errors on the first two attempts (rqlite not readable yet) then
-	// succeeds — the retry must return the config and not surface the earlier
-	// transient errors.
-	calls := 0
-	slept := 0
-	cfg, err := resolveWebRTCConfigWithRetry(5, time.Millisecond, func(time.Duration) { slept++ },
-		func() (*WebRTCConfig, error) {
-			calls++
-			if calls < 3 {
-				return nil, errors.New("rqlite not readable yet")
-			}
-			return &WebRTCConfig{TURNSharedSecret: "sek-123"}, nil
-		})
-
-	if err != nil {
-		t.Fatalf("want success on the 3rd attempt; got err %v", err)
-	}
-	if cfg == nil || cfg.TURNSharedSecret != "sek-123" {
-		t.Fatalf("want resolved config; got %+v", cfg)
-	}
-	if calls != 3 {
-		t.Errorf("want exactly 3 fetch attempts; got %d", calls)
-	}
-	if slept != 2 {
-		t.Errorf("want a sleep between each of the 2 failed attempts; got %d", slept)
-	}
-}
-
-func TestResolveWebRTCConfigWithRetry_exhaustsAndReturnsError(t *testing.T) {
-	// A persistent error (e.g. a decrypt failure after cluster-secret rotation)
-	// must exhaust all attempts and return the final error — the caller maps
-	// that to unresolved (NOT disabled). No sleep after the final attempt.
-	calls := 0
-	slept := 0
-	cfg, err := resolveWebRTCConfigWithRetry(4, time.Millisecond, func(time.Duration) { slept++ },
-		func() (*WebRTCConfig, error) {
-			calls++
-			return nil, errors.New("decrypt failed")
-		})
-
-	if err == nil {
-		t.Fatal("want the final error after exhausting retries; got nil")
-	}
-	if cfg != nil {
-		t.Errorf("want nil config on exhaustion; got %+v", cfg)
-	}
-	if calls != 4 {
-		t.Errorf("want 4 attempts (all retries used); got %d", calls)
-	}
-	if slept != 3 {
-		t.Errorf("want a sleep between attempts but not after the last; got %d", slept)
+	// Stealth disabled everywhere → empty stealthDomain (gateway advertises
+	// the baseline 3-rung ladder only).
+	got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "", dbNone)
+	if got.stealthDomain != "" {
+		t.Errorf("stealthDomain = %q; want empty when stealth is disabled", got.stealthDomain)
 	}
 }
--- a/core/pkg/namespace/systemd_spawner.go
+++ b/core/pkg/namespace/systemd_spawner.go
@ -801,23 +801,8 @@ func (s *SystemdSpawner) SaveClusterState(namespace string, data []byte) error {
 		return fmt.Errorf("failed to create namespace dir: %w", err)
 	}
 	path := filepath.Join(dir, "cluster-state.json")
-	// Atomic write to a temp file + rename: cluster-state.json carries the
-	// namespace TURN shared secret (bugboard #130), so it must not be
-	// world/group readable on the receiving node either, and a reader must
-	// never see a half-written secret. 0600 + chmod on the temp file keeps the
-	// secret private; the rename then makes the live file 0600 too, tightening
-	// a file an older release wrote 0644.
-	tmp := path + ".tmp"
-	if err := os.WriteFile(tmp, data, 0600); err != nil {
-		return fmt.Errorf("failed to write temp cluster state: %w", err)
-	}
-	if err := os.Chmod(tmp, 0600); err != nil {
-		os.Remove(tmp)
-		return fmt.Errorf("failed to set cluster state permissions: %w", err)
-	}
-	if err := os.Rename(tmp, path); err != nil {
-		os.Remove(tmp)
-		return fmt.Errorf("failed to rename cluster state into place: %w", err)
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		return fmt.Errorf("failed to write cluster state: %w", err)
 	}
 	s.logger.Info("Saved cluster state from coordinator",
 		zap.String("namespace", namespace),
--- a/core/pkg/node/gateway.go
+++ b/core/pkg/node/gateway.go
@ -161,13 +161,6 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
 			zap.String("base_domain", clusterCfg.BaseDomain),
 			zap.String("base_data_dir", baseDataDir))

-		// Keep namespace raft leadership on co-located voters (bugboard #708):
-		// a geography-blind raft election can place leadership on a distant
-		// node, funneling every write across a ~256ms link into 5-10s RPCs.
-		// This reconciler hands leadership off an isolated leader to the nearest
-		// voter — never changing membership (all nodes stay voters).
-		clusterManager.StartLeaderLocalityReconciler(ctx)
-
 		// Restore previously-running namespace cluster processes in background.
 		// First try local state files (no DB dependency), then fall back to DB query with retries.
 		go func() {
--- a/core/pkg/push/dispatcher.go
+++ b/core/pkg/push/dispatcher.go
@ -4,7 +4,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"net/http"
 	"sync"

 	"go.uber.org/zap"
@ -186,12 +185,6 @@ func (d *PushDispatcher) SendToUserDetailed(
 			out.Ok = false
 		} else {
 			r.Success = true
-			// Record the success status explicitly. A provider Send returns nil
-			// only on a 2xx delivery, so surface 200 instead of leaving
-			// HTTPStatus at its zero value — otherwise a successful push logs
-			// "http=0", which reads like an opaque failure and masks real
-			// false-success classes (bugboard #132).
-			r.HTTPStatus = http.StatusOK
 			out.DevicesSucceeded++
 		}
 		out.Results = append(out.Results, r)
--- a/core/pkg/push/providers/apns/apns.go
+++ b/core/pkg/push/providers/apns/apns.go
@ -20,15 +20,6 @@ import (
 // provider's 5s because APNs is HTTP/2 + connection-reused.
 const defaultSendTimeout = 10 * time.Second

-// voipPushExpiry caps the apns-expiration on VoIP (call-invite) pushes to the
-// ring window. A call signal that can't be delivered within this window is
-// worse than undelivered: without an expiration APNs store-and-forwards it and
-// lands it MINUTES later, firing a phantom "missed call" ring on the device and
-// burning PushKit goodwill (bugboard #132). With it, APNs delivers promptly or
-// DISCARDS — never a stale invite. Alert pushes keep the default
-// store-and-forward behavior.
-const voipPushExpiry = 30 * time.Second
-
 // Provider is the APNs push.PushProvider implementation, scoped to one
 // (Team ID, Key ID, p8 key, Bundle ID, Environment, Kind) tuple.
 // Construct one per (namespace, kind) via the gateway dependency
@ -178,15 +169,6 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
 		n.Priority = apns2.PriorityLow
 	}

-	// Cap VoIP expiration to the ring window so APNs never store-and-forwards a
-	// stale call-invite into a phantom missed-call ring (bugboard #132). Without
-	// this, apns2 omits apns-expiration and APNs stores+retries for its default
-	// (minutes to days). Alert pushes intentionally keep the default so a
-	// message notification still lands after the device reconnects.
-	if p.kind == KindVoIP {
-		n.Expiration = time.Now().Add(voipPushExpiry)
-	}
-
 	// PushWithContext propagates cancellation through to the HTTP/2
 	// stream — abandoning ctx terminates the in-flight request, no
 	// goroutine leak.
--- a/core/pkg/push/providers/apns/voip_test.go
+++ b/core/pkg/push/providers/apns/voip_test.go
@ -4,7 +4,6 @@ import (
 	"context"
 	"net/http"
 	"testing"
-	"time"

 	"github.com/DeBrosOfficial/network/pkg/push"
 	"github.com/sideshow/apns2"
@ -186,43 +185,3 @@ func TestAlert_Send_EmptyContentStillRejected(t *testing.T) {
 		t.Fatal("alert path should still reject empty-content (bugboard #348); got nil")
 	}
 }
-
-// Bugboard #132: VoIP call-invites MUST carry a short apns-expiration so APNs
-// never store-and-forwards a stale invite into a phantom missed-call ring
-// minutes later. Without it apns2 omits the header → store-and-forward.
-func TestVoIP_Send_ExpirationCappedToRingWindow(t *testing.T) {
-	fake := &fakePushClient{resp: &apns2.Response{StatusCode: http.StatusOK, ApnsID: "voip-exp"}}
-	p := newTestProviderKind(t, "com.example.app", KindVoIP, fake)
-	before := time.Now()
-	if err := p.Send(context.Background(), push.PushMessage{
-		DeviceToken: "VOIP-TOKEN",
-		Data:        map[string]interface{}{"call_id": "x"},
-	}); err != nil {
-		t.Fatalf("Send: %v", err)
-	}
-	exp := fake.lastSent.Expiration
-	if exp.IsZero() {
-		t.Fatal("VoIP push has NO apns-expiration — APNs store-and-forwards → late phantom ring (#132)")
-	}
-	if !exp.After(before) {
-		t.Errorf("expiration %v not in the future (before=%v)", exp, before)
-	}
-	if exp.After(before.Add(voipPushExpiry + 2*time.Second)) {
-		t.Errorf("expiration %v exceeds the ring-window cap (%s) — would allow a late ring", exp, voipPushExpiry)
-	}
-}
-
-// Alert (message) pushes intentionally keep store-and-forward (no expiration) so
-// a notification still lands after reconnect — only the VoIP path is capped.
-func TestAlert_Send_NoExpiration_keepsStoreAndForward(t *testing.T) {
-	fake := &fakePushClient{resp: &apns2.Response{StatusCode: http.StatusOK, ApnsID: "alert-1"}}
-	p := newTestProviderKind(t, "com.example.app", KindAlert, fake)
-	if err := p.Send(context.Background(), push.PushMessage{
-		DeviceToken: "ALERT-TOKEN", Title: "hi", Body: "msg",
-	}); err != nil {
-		t.Fatalf("Send: %v", err)
-	}
-	if !fake.lastSent.Expiration.IsZero() {
-		t.Errorf("alert push set expiration %v; want none (store-and-forward)", fake.lastSent.Expiration)
-	}
-}
--- a/core/pkg/push/providers/ntfy/ntfy.go
+++ b/core/pkg/push/providers/ntfy/ntfy.go
@ -23,14 +23,12 @@ package ntfy

 import (
 	"context"
-	"crypto/tls"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
 	"net/url"
 	"strings"
-	"sync"
 	"time"

 	"github.com/DeBrosOfficial/network/pkg/push"
@ -47,34 +45,14 @@ type Config struct {
 	AuthToken string
 	// Timeout bounds each Send call. 0 selects 5 seconds.
 	Timeout time.Duration
-
-	// FanoutResolver, when set, returns the set of ntfy publish base URLs to
-	// deliver EACH publish to — one per active push node. The cluster runs an
-	// independent ntfy per node with NO shared message store, while subscribers
-	// are scattered across nodes by round-robin DNS; a publish that lands on one
-	// node only reaches subscribers on that node, losing ~(N-1)/N (bugboard
-	// #858). Fanning a publish to EVERY node guarantees it reaches whichever
-	// instance the subscriber's connection landed on. When nil, or it returns no
-	// hosts (or errors), Send falls back to the single BaseURL — so push never
-	// breaks if node discovery is unavailable.
-	FanoutResolver func(ctx context.Context) ([]string, error)
-	// FanoutHostHeader, when set, overrides the HTTP Host header and TLS SNI on
-	// fan-out requests. Needed because FanoutResolver returns per-node addresses
-	// (IPs) but each node's reverse proxy (Caddy) routes by — and serves its TLS
-	// cert for — the public push hostname. Empty: no override (tests /
-	// homogeneous hosts).
-	FanoutHostHeader string
 }

 // Provider is the ntfy push.PushProvider implementation.
 type Provider struct {
-	baseURL          string
-	authToken        string
-	httpClient       *http.Client
-	fanoutClient     *http.Client
-	fanoutResolver   func(ctx context.Context) ([]string, error)
-	fanoutHostHeader string
-	logger           *zap.Logger
+	baseURL    string
+	authToken  string
+	httpClient *http.Client
+	logger     *zap.Logger
 }

 // New creates a Provider with the given config.
@ -86,37 +64,18 @@ func New(cfg Config, logger *zap.Logger) *Provider {
 	if timeout <= 0 {
 		timeout = 5 * time.Second
 	}
-	p := &Provider{
-		baseURL:          strings.TrimRight(cfg.BaseURL, "/"),
-		authToken:        cfg.AuthToken,
-		httpClient:       &http.Client{Timeout: timeout},
-		fanoutResolver:   cfg.FanoutResolver,
-		fanoutHostHeader: cfg.FanoutHostHeader,
-		logger:           logger.Named("ntfy"),
+	return &Provider{
+		baseURL:    strings.TrimRight(cfg.BaseURL, "/"),
+		authToken:  cfg.AuthToken,
+		httpClient: &http.Client{Timeout: timeout},
+		logger:     logger.Named("ntfy"),
 	}
-	if cfg.FanoutResolver != nil {
-		// Fan-out requests dial per-node addresses but must present the public
-		// push hostname for SNI so each node's Caddy serves the right cert and
-		// routes to its local ntfy. A dedicated client carries that fixed SNI.
-		tr := &http.Transport{}
-		if cfg.FanoutHostHeader != "" {
-			tr.TLSClientConfig = &tls.Config{ServerName: cfg.FanoutHostHeader}
-		}
-		p.fanoutClient = &http.Client{Timeout: timeout, Transport: tr}
-	}
-	return p
 }

 // Name implements push.PushProvider.
 func (p *Provider) Name() string { return "ntfy" }

 // Send delivers a push notification to the device's ntfy topic.
-//
-// When a FanoutResolver is configured, the publish is delivered to EVERY active
-// push node (the ntfy instances don't share state, so the subscriber's instance
-// — whichever the round-robin LB picked — must be among the targets), and Send
-// succeeds as long as at least one instance accepted it (bugboard #858).
-// Otherwise it publishes to the single configured BaseURL.
 func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
 	if msg.DeviceToken == "" {
 		return push.ErrEmptyToken
@ -125,7 +84,7 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
 		return fmt.Errorf("ntfy: base URL not configured")
 	}

-	topic, err := p.resolveTopic(msg.DeviceToken)
+	endpointURL, err := p.resolveEndpoint(msg.DeviceToken)
 	if err != nil {
 		return err
 	}
@ -143,73 +102,10 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
 		body = string(b)
 	}

-	// Resolve the set of base URLs to publish to. Default: the single base URL.
-	// With a fan-out resolver, publish to every active push node so the
-	// subscriber's instance is always covered. Resolver failure is non-fatal —
-	// fall back to the base URL so push keeps working.
-	bases := []string{p.baseURL}
-	httpClient := p.httpClient
-	hostHeader := ""
-	if p.fanoutResolver != nil {
-		if hosts, rerr := p.fanoutResolver(ctx); rerr != nil {
-			p.logger.Warn("ntfy fan-out node resolution failed; publishing to base URL only", zap.Error(rerr))
-		} else if len(hosts) > 0 {
-			bases = hosts
-			httpClient = p.fanoutClient
-			hostHeader = p.fanoutHostHeader
-		}
-	}
-
-	if len(bases) == 1 {
-		return p.postOne(ctx, httpClient, bases[0], topic, body, msg, hostHeader)
-	}
-
-	// Fan out concurrently. Success = at least one instance accepted the
-	// publish (the message is in the cluster). A node that's down is logged but
-	// does not fail the Send, since the message still reaches every reachable
-	// instance — including, in the common case, the subscriber's.
-	var wg sync.WaitGroup
-	errs := make([]error, len(bases))
-	for i, base := range bases {
-		wg.Add(1)
-		go func(i int, base string) {
-			defer wg.Done()
-			errs[i] = p.postOne(ctx, httpClient, base, topic, body, msg, hostHeader)
-		}(i, base)
-	}
-	wg.Wait()
-
-	okCount := 0
-	var firstErr error
-	for _, e := range errs {
-		if e == nil {
-			okCount++
-		} else if firstErr == nil {
-			firstErr = e
-		}
-	}
-	if okCount == 0 {
-		return fmt.Errorf("ntfy: fan-out to all %d push nodes failed: %w", len(bases), firstErr)
-	}
-	if okCount < len(bases) {
-		p.logger.Warn("ntfy fan-out partial failure (message still delivered to the reachable instances)",
-			zap.Int("delivered", okCount), zap.Int("total", len(bases)), zap.Error(firstErr))
-	}
-	return nil
-}
-
-// postOne publishes a single (already-resolved) topic+body to one ntfy base URL.
-// hostHeader, when non-empty, overrides the HTTP Host header so a request dialed
-// at a node IP is still routed by the node's proxy as the public push hostname.
-func (p *Provider) postOne(ctx context.Context, httpClient *http.Client, base, topic, body string, msg push.PushMessage, hostHeader string) error {
-	endpointURL := strings.TrimRight(base, "/") + "/" + topic
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(body))
 	if err != nil {
 		return fmt.Errorf("ntfy: build request: %w", err)
 	}
-	if hostHeader != "" {
-		req.Host = hostHeader
-	}

 	if msg.Title != "" {
 		req.Header.Set("Title", msg.Title)
@ -231,15 +127,15 @@ func (p *Provider) postOne(ctx context.Context, httpClient *http.Client, base, t
 		req.Header.Set("Authorization", "Bearer "+p.authToken)
 	}

-	resp, err := httpClient.Do(req)
+	resp, err := p.httpClient.Do(req)
 	if err != nil {
 		return fmt.Errorf("ntfy: post: %w", err)
 	}
 	defer resp.Body.Close()

 	if resp.StatusCode >= 400 {
-		errBody, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
-		return fmt.Errorf("ntfy: http %d: %s", resp.StatusCode, strings.TrimSpace(string(errBody)))
+		body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return fmt.Errorf("ntfy: http %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
 	}

 	// Drain body to allow connection reuse.
@ -247,21 +143,20 @@ func (p *Provider) postOne(ctx context.Context, httpClient *http.Client, base, t
 	return nil
 }

-// resolveTopic maps a device token to the escaped ntfy topic path (without the
-// base URL), so the same topic can be published to one or many push nodes.
+// resolveEndpoint maps a device token to the ntfy publish URL.
 //
 // The token is one of two shapes:
 //
 //   - A plain ntfy topic (possibly hierarchical, e.g. "ns/myapp/user-1") —
-//     each path segment is escaped so a crafted token can't break out of the
-//     topic path.
+//     published to "<baseURL>/<topic>", with each path segment escaped so a
+//     crafted token can't break out of the topic path.
 //   - A full UnifiedPush endpoint URL handed to the client by the ntfy
 //     distributor (e.g. "https://push.example.com/up<random>"). UnifiedPush
-//     requires the application server to POST to that endpoint, so we accept it
-//     — but ONLY after verifying its scheme+host match the configured base URL,
-//     then take only its path as the topic. That turns a device-supplied token
-//     into a publish only against our own push host, never an arbitrary one.
-func (p *Provider) resolveTopic(token string) (string, error) {
+//     requires the application server to POST to that endpoint verbatim, so we
+//     use it as-is — but ONLY after verifying its scheme+host match the
+//     configured base URL. That check turns a device-supplied token into an
+//     SSRF only against our own push host, never an arbitrary one.
+func (p *Provider) resolveEndpoint(token string) (string, error) {
 	topic := token
 	if isAbsoluteHTTPURL(token) {
 		u, err := url.Parse(token)
@ -278,7 +173,10 @@ func (p *Provider) resolveTopic(token string) (string, error) {
 			return "", fmt.Errorf("ntfy: endpoint host %q does not match configured push host %q", u.Host, base.Host)
 		}
 		// Confine the URL form to the SAME publish surface as a bare topic:
-		// take only the path as the topic, dropping any query/fragment.
+		// take only the path as the topic and re-build through the per-segment
+		// escaping below, dropping any query/fragment. So a UnifiedPush
+		// endpoint token can publish a topic but can't gain arbitrary path or
+		// query control on the push host beyond what a plain topic already has.
 		topic = strings.TrimPrefix(u.Path, "/")
 		if topic == "" {
 			return "", fmt.Errorf("ntfy: endpoint url %q has no topic path", token)
@ -290,7 +188,7 @@ func (p *Provider) resolveTopic(token string) (string, error) {
 	for i, seg := range parts {
 		parts[i] = url.PathEscape(seg)
 	}
-	return strings.Join(parts, "/"), nil
+	return p.baseURL + "/" + strings.Join(parts, "/"), nil
 }

 // isAbsoluteHTTPURL reports whether s looks like an absolute http(s) URL (the
--- a/core/pkg/push/providers/ntfy/ntfy_test.go
+++ b/core/pkg/push/providers/ntfy/ntfy_test.go
@ -8,7 +8,6 @@ import (
 	"net/http/httptest"
 	"net/url"
 	"strings"
-	"sync"
 	"testing"
 	"time"

@ -307,136 +306,3 @@ func TestName(t *testing.T) {
 		t.Errorf("expected Name=ntfy, got %s", p.Name())
 	}
 }
-
-// ----------------------------------------------------------------------------
-// Bugboard #858 — cluster fan-out. Each push node runs an independent ntfy with
-// no shared store, so a publish must reach EVERY node for the subscriber's
-// instance (round-robin DNS picks one) to receive it.
-// ----------------------------------------------------------------------------
-
-// fanoutRecorder is a test ntfy node that records the topics it received.
-type fanoutRecorder struct {
-	mu     sync.Mutex
-	topics []string
-}
-
-func newFanoutNode(t *testing.T) (*httptest.Server, *fanoutRecorder) {
-	t.Helper()
-	rec := &fanoutRecorder{}
-	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		rec.mu.Lock()
-		rec.topics = append(rec.topics, strings.TrimPrefix(r.URL.Path, "/"))
-		rec.mu.Unlock()
-		w.WriteHeader(http.StatusOK)
-	}))
-	return srv, rec
-}
-
-func (r *fanoutRecorder) count() int {
-	r.mu.Lock()
-	defer r.mu.Unlock()
-	return len(r.topics)
-}
-
-func TestSend_fanout_publishesToAllNodes(t *testing.T) {
-	s1, r1 := newFanoutNode(t)
-	defer s1.Close()
-	s2, r2 := newFanoutNode(t)
-	defer s2.Close()
-	s3, r3 := newFanoutNode(t)
-	defer s3.Close()
-
-	p := New(Config{
-		BaseURL: s1.URL, // base URL still required; fan-out targets come from the resolver
-		FanoutResolver: func(context.Context) ([]string, error) {
-			return []string{s1.URL, s2.URL, s3.URL}, nil
-		},
-	}, nil)
-
-	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: "user-1", Body: "hi"}); err != nil {
-		t.Fatalf("Send: %v", err)
-	}
-	for i, r := range []*fanoutRecorder{r1, r2, r3} {
-		if r.count() != 1 {
-			t.Errorf("node %d received %d publishes; want exactly 1 (the publish must reach every node)", i+1, r.count())
-		}
-		if r.count() == 1 && r.topics[0] != "user-1" {
-			t.Errorf("node %d got topic %q; want user-1", i+1, r.topics[0])
-		}
-	}
-}
-
-func TestSend_fanout_oneNodeDown_stillSucceeds(t *testing.T) {
-	up, rUp := newFanoutNode(t)
-	defer up.Close()
-	down, _ := newFanoutNode(t)
-	down.Close() // unreachable
-
-	p := New(Config{
-		BaseURL: up.URL,
-		FanoutResolver: func(context.Context) ([]string, error) {
-			return []string{up.URL, down.URL}, nil
-		},
-	}, nil)
-
-	// At least one node accepted it → Send succeeds; the message still reached
-	// the reachable instances.
-	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: "t", Body: "x"}); err != nil {
-		t.Fatalf("Send should succeed when at least one node is up; got %v", err)
-	}
-	if rUp.count() != 1 {
-		t.Errorf("the up node should have received the publish; got %d", rUp.count())
-	}
-}
-
-func TestSend_fanout_allNodesDown_returnsError(t *testing.T) {
-	d1, _ := newFanoutNode(t)
-	d1.Close()
-	d2, _ := newFanoutNode(t)
-	d2.Close()
-
-	p := New(Config{
-		BaseURL: "http://127.0.0.1:1", // unused for posting; just non-empty
-		FanoutResolver: func(context.Context) ([]string, error) {
-			return []string{d1.URL, d2.URL}, nil
-		},
-	}, nil)
-
-	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: "t", Body: "x"}); err == nil {
-		t.Fatal("Send should fail when every node is unreachable")
-	}
-}
-
-func TestSend_fanout_resolverEmpty_fallsBackToBaseURL(t *testing.T) {
-	base, rBase := newFanoutNode(t)
-	defer base.Close()
-
-	p := New(Config{
-		BaseURL:        base.URL,
-		FanoutResolver: func(context.Context) ([]string, error) { return nil, nil }, // no active nodes
-	}, nil)
-
-	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: "t", Body: "x"}); err != nil {
-		t.Fatalf("Send: %v", err)
-	}
-	if rBase.count() != 1 {
-		t.Errorf("empty resolver must fall back to the base URL; base got %d publishes", rBase.count())
-	}
-}
-
-func TestSend_fanout_resolverError_fallsBackToBaseURL(t *testing.T) {
-	base, rBase := newFanoutNode(t)
-	defer base.Close()
-
-	p := New(Config{
-		BaseURL:        base.URL,
-		FanoutResolver: func(context.Context) ([]string, error) { return nil, context.DeadlineExceeded },
-	}, nil)
-
-	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: "t", Body: "x"}); err != nil {
-		t.Fatalf("resolver error must not fail the push (fall back to base URL); got %v", err)
-	}
-	if rBase.count() != 1 {
-		t.Errorf("resolver error must fall back to the base URL; base got %d publishes", rBase.count())
-	}
-}
--- a/core/pkg/rqlite/leadership.go
+++ b/core/pkg/rqlite/leadership.go
@ -10,39 +10,53 @@ import (
 	"go.uber.org/zap"
 )

-// GetRaftStatus queries a local rqlite node's /status endpoint.
-func GetRaftStatus(port int) (*RQLiteStatus, error) {
+// TransferLeadership attempts to transfer Raft leadership to another voter.
+// Used by both the RQLiteManager (on Stop) and the CLI (pre-upgrade).
+// Returns nil if this node is not the leader or if transfer succeeds.
+func TransferLeadership(port int, logger *zap.Logger) error {
 	client := &http.Client{Timeout: 5 * time.Second}
-	resp, err := client.Get(fmt.Sprintf("http://localhost:%d/status", port))
+
+	// 1. Check if we're the leader
+	statusURL := fmt.Sprintf("http://localhost:%d/status", port)
+	resp, err := client.Get(statusURL)
 	if err != nil {
-		return nil, fmt.Errorf("failed to query status: %w", err)
+		return fmt.Errorf("failed to query status: %w", err)
 	}
 	defer resp.Body.Close()
+
 	body, err := io.ReadAll(resp.Body)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read status: %w", err)
+		return fmt.Errorf("failed to read status: %w", err)
 	}
+
 	var status RQLiteStatus
 	if err := json.Unmarshal(body, &status); err != nil {
-		return nil, fmt.Errorf("failed to parse status: %w", err)
+		return fmt.Errorf("failed to parse status: %w", err)
 	}
-	return &status, nil
-}

-// GetRaftNodes queries a local rqlite node's /nodes endpoint (voters +
-// non-voters, with reachability).
-func GetRaftNodes(port int) (RQLiteNodes, error) {
-	client := &http.Client{Timeout: 5 * time.Second}
-	resp, err := client.Get(fmt.Sprintf("http://localhost:%d/nodes?nonvoters&ver=2&timeout=5s", port))
-	if err != nil {
-		return nil, fmt.Errorf("failed to query nodes: %w", err)
+	if status.Store.Raft.State != "Leader" {
+		logger.Debug("Not the leader, skipping transfer", zap.Int("port", port))
+		return nil
 	}
-	defer resp.Body.Close()
-	nodesBody, err := io.ReadAll(resp.Body)
+
+	logger.Info("This node is the Raft leader, attempting leadership transfer",
+		zap.Int("port", port),
+		zap.String("leader_id", status.Store.Raft.LeaderID))
+
+	// 2. Find an eligible voter to transfer to
+	nodesURL := fmt.Sprintf("http://localhost:%d/nodes?nonvoters&ver=2&timeout=5s", port)
+	nodesResp, err := client.Get(nodesURL)
 	if err != nil {
-		return nil, fmt.Errorf("failed to read nodes: %w", err)
+		return fmt.Errorf("failed to query nodes: %w", err)
 	}
-	// Try ver=2 wrapped format, fall back to plain array.
+	defer nodesResp.Body.Close()
+
+	nodesBody, err := io.ReadAll(nodesResp.Body)
+	if err != nil {
+		return fmt.Errorf("failed to read nodes: %w", err)
+	}
+
+	// Try ver=2 wrapped format, fall back to plain array
 	var nodes RQLiteNodes
 	var wrapped struct {
 		Nodes RQLiteNodes `json:"nodes"`
@ -52,28 +66,8 @@ func GetRaftNodes(port int) (RQLiteNodes, error) {
 	} else {
 		_ = json.Unmarshal(nodesBody, &nodes)
 	}
-	return nodes, nil
-}

-// TransferLeadership attempts to transfer Raft leadership to another voter.
-// Used by both the RQLiteManager (on Stop) and the CLI (pre-upgrade).
-// Returns nil if this node is not the leader or if transfer succeeds.
-func TransferLeadership(port int, logger *zap.Logger) error {
-	status, err := GetRaftStatus(port)
-	if err != nil {
-		return err
-	}
-	if status.Store.Raft.State != "Leader" {
-		logger.Debug("Not the leader, skipping transfer", zap.Int("port", port))
-		return nil
-	}
-
-	nodes, err := GetRaftNodes(port)
-	if err != nil {
-		return err
-	}
-
-	// Find any reachable voter that is NOT us.
+	// Find a reachable voter that is NOT us
 	var targetID string
 	for _, n := range nodes {
 		if n.Voter && n.Reachable && n.ID != status.Store.Raft.LeaderID {
@ -81,55 +75,57 @@ func TransferLeadership(port int, logger *zap.Logger) error {
 			break
 		}
 	}
+
 	if targetID == "" {
 		logger.Warn("No eligible voter found for leadership transfer — will rely on SIGTERM graceful step-down",
 			zap.Int("port", port))
 		return nil
 	}
-	return TransferLeadershipTo(port, targetID, logger)
-}
-
-// TransferLeadershipTo transfers Raft leadership to a SPECIFIC target node ID
-// (its raft address). The caller is responsible for confirming this node is the
-// leader and that targetID is an eligible voter. Tolerant of a missing API
-// (404) and a non-OK status — it logs and returns nil so callers treat transfer
-// as best-effort.
-func TransferLeadershipTo(port int, targetID string, logger *zap.Logger) error {
-	client := &http.Client{Timeout: 5 * time.Second}
-
-	logger.Info("Attempting Raft leadership transfer",
-		zap.Int("port", port), zap.String("target", targetID))

+	// 3. Attempt transfer via rqlite v8+ API
+	// POST /nodes/<target>/transfer-leadership
+	// If the API doesn't exist (404), fall back to relying on SIGTERM.
 	transferURL := fmt.Sprintf("http://localhost:%d/nodes/%s/transfer-leadership", port, targetID)
 	transferResp, err := client.Post(transferURL, "application/json", nil)
 	if err != nil {
-		logger.Warn("Leadership transfer request failed", zap.Error(err))
+		logger.Warn("Leadership transfer request failed, relying on SIGTERM",
+			zap.Error(err))
 		return nil
 	}
 	transferResp.Body.Close()

-	switch {
-	case transferResp.StatusCode == http.StatusNotFound:
-		logger.Info("Leadership transfer API not available (rqlite version)")
+	if transferResp.StatusCode == http.StatusNotFound {
+		logger.Info("Leadership transfer API not available (rqlite version), relying on SIGTERM")
 		return nil
-	case transferResp.StatusCode != http.StatusOK:
+	}
+
+	if transferResp.StatusCode != http.StatusOK {
 		logger.Warn("Leadership transfer returned unexpected status",
 			zap.Int("status", transferResp.StatusCode))
 		return nil
 	}

-	// Verify.
+	// 4. Verify transfer
 	time.Sleep(2 * time.Second)
-	newStatus, err := GetRaftStatus(port)
+	verifyResp, err := client.Get(statusURL)
 	if err != nil {
 		logger.Info("Could not verify transfer (node may have already stepped down)")
 		return nil
 	}
-	if newStatus.Store.Raft.State != "Leader" {
-		logger.Info("Leadership transferred successfully",
-			zap.String("new_leader", newStatus.Store.Raft.LeaderID), zap.Int("port", port))
-	} else {
-		logger.Warn("Still leader after transfer attempt", zap.Int("port", port))
+	defer verifyResp.Body.Close()
+
+	verifyBody, _ := io.ReadAll(verifyResp.Body)
+	var newStatus RQLiteStatus
+	if err := json.Unmarshal(verifyBody, &newStatus); err == nil {
+		if newStatus.Store.Raft.State != "Leader" {
+			logger.Info("Leadership transferred successfully",
+				zap.String("new_leader", newStatus.Store.Raft.LeaderID),
+				zap.Int("port", port))
+		} else {
+			logger.Warn("Still leader after transfer attempt — will rely on SIGTERM",
+				zap.Int("port", port))
+		}
 	}
+
 	return nil
 }
--- a/core/pkg/serverless/invoke.go
+++ b/core/pkg/serverless/invoke.go
@ -118,16 +118,16 @@ func (i *Invoker) Invoke(ctx context.Context, req *InvokeRequest) (*InvokeRespon
 	// #264). The auth boundary for system triggers is at REGISTRATION
 	// time (HTTP `POST /v1/functions/{name}/triggers`, or deploy-time
 	// auto-register from function.yaml), not at firing time.
-	if !isSystemTrigger(req.TriggerType) && !canInvokeFn(fn, req.CallerWallet) {
-		// Authorization uses the function we already fetched above —
-		// CanInvoke would re-`registry.Get` it, a redundant leader-routed
-		// read on every op (bugboard #708).
-		return &InvokeResponse{
-			RequestID:  requestID,
-			Status:     InvocationStatusError,
-			Error:      "unauthorized",
-			DurationMS: time.Since(startTime).Milliseconds(),
-		}, ErrUnauthorized
+	if !isSystemTrigger(req.TriggerType) {
+		authorized, err := i.CanInvoke(ctx, req.Namespace, req.FunctionName, req.CallerWallet)
+		if err != nil || !authorized {
+			return &InvokeResponse{
+				RequestID:  requestID,
+				Status:     InvocationStatusError,
+				Error:      "unauthorized",
+				DurationMS: time.Since(startTime).Milliseconds(),
+			}, ErrUnauthorized
+		}
 	}

 	// Get environment variables
@ -493,7 +493,7 @@ func (i *Invoker) BatchInvoke(ctx context.Context, req *BatchInvokeRequest) (*Ba
 func isSystemTrigger(t TriggerType) bool {
 	switch t {
 	case TriggerTypeCron, TriggerTypePubSub, TriggerTypeDatabase,
-		TriggerTypeTimer, TriggerTypeJob, TriggerTypeInternal:
+		TriggerTypeTimer, TriggerTypeJob:
 		return true
 	}
 	return false
@ -504,19 +504,20 @@ func (i *Invoker) CanInvoke(ctx context.Context, namespace, functionName string,
 	if err != nil {
 		return false, err
 	}
-	return canInvokeFn(fn, callerWallet), nil
-}

-// canInvokeFn is the pure authorization decision for an already-fetched
-// function, so the hot Invoke path doesn't re-read the registry (bugboard
-// #708). Public functions are open; a private function only requires that the
-// caller has SOME identity — the auth middleware already verified namespace
-// membership before the function ran.
-func canInvokeFn(fn *Function, callerWallet string) bool {
+	// Public functions can be invoked by anyone (auth middleware allows
+	// the request through without credentials).
 	if fn.IsPublic {
-		return true
+		return true, nil
 	}
-	return strings.TrimSpace(callerWallet) != ""
+
+	// Private function: require an authenticated caller. The auth
+	// middleware has already verified the caller belongs to this
+	// namespace (either via JWT `namespace` claim or via API-key
+	// namespace lookup) before this function ever runs, so the only
+	// thing we need to confirm here is that the caller has SOME
+	// identity at all (i.e. the request wasn't anonymous).
+	return strings.TrimSpace(callerWallet) != "", nil
 }

 // GetFunctionInfo returns basic info about a function for invocation.
--- a/core/pkg/serverless/registry.go
+++ b/core/pkg/serverless/registry.go
@ -6,9 +6,7 @@ import (
 	"database/sql"
 	"fmt"
 	"io"
-	"strconv"
 	"strings"
-	"sync"
 	"time"

 	"github.com/DeBrosOfficial/network/pkg/ipfs"
@ -17,27 +15,6 @@ import (
 	"go.uber.org/zap"
 )

-// registryCacheTTL bounds how long function metadata + env vars are cached
-// in-process before re-reading rqlite. Bugboard #708: every function_invoke
-// previously did 3 uncached `weak` reads (Get, a redundant Get inside
-// CanInvoke, and GetEnvVars), each forwarded to the raft leader — ~820ms of
-// pure pre-flight tax per op when the leader is a distant node. With a short
-// TTL + explicit invalidation on deploy/enable/disable/delete, a burst of RPCs
-// (e.g. a call setup) reads metadata once instead of N times. The TTL is a
-// backstop; correctness comes from the explicit invalidation, so cross-node
-// propagation of a deploy/disable is bounded to this TTL.
-const registryCacheTTL = 5 * time.Second
-
-type fnCacheEntry struct {
-	fn *Function
-	at time.Time
-}
-
-type envCacheEntry struct {
-	env map[string]string
-	at  time.Time
-}
-
 // Ensure Registry implements FunctionRegistry and InvocationLogger interfaces.
 var _ FunctionRegistry = (*Registry)(nil)
 var _ InvocationLogger = (*Registry)(nil)
@ -50,12 +27,6 @@ type Registry struct {
 	ipfsAPIURL string
 	logger     *zap.Logger
 	tableName  string
-
-	// Metadata cache (bugboard #708) — see registryCacheTTL.
-	cacheTTL time.Duration
-	cacheMu  sync.RWMutex
-	fnCache  map[string]fnCacheEntry  // key: namespace\x00name\x00version
-	envCache map[string]envCacheEntry // key: functionID
 }

 // RegistryConfig holds configuration for the Registry.
@ -71,78 +42,9 @@ func NewRegistry(db rqlite.Client, ipfsClient ipfs.IPFSClient, cfg RegistryConfi
 		ipfsAPIURL: cfg.IPFSAPIURL,
 		logger:     logger,
 		tableName:  "functions",
-		cacheTTL:   registryCacheTTL,
-		fnCache:    make(map[string]fnCacheEntry),
-		envCache:   make(map[string]envCacheEntry),
 	}
 }

-// --- metadata cache (bugboard #708) ---
-//
-// The cached *Function and env map are SHARED with all callers and MUST be
-// treated as read-only — no consumer in pkg/serverless mutates them today, and
-// none may, or it would corrupt the cache for concurrent readers.
-
-func fnCacheKey(namespace, name string, version int) string {
-	return namespace + "\x00" + name + "\x00" + strconv.Itoa(version)
-}
-
-func (r *Registry) cachedFn(key string) (*Function, bool) {
-	r.cacheMu.RLock()
-	e, ok := r.fnCache[key]
-	r.cacheMu.RUnlock()
-	if !ok || time.Since(e.at) > r.cacheTTL {
-		return nil, false
-	}
-	return e.fn, true
-}
-
-func (r *Registry) storeFn(key string, fn *Function) {
-	r.cacheMu.Lock()
-	r.fnCache[key] = fnCacheEntry{fn: fn, at: time.Now()}
-	r.cacheMu.Unlock()
-}
-
-func (r *Registry) cachedEnv(functionID string) (map[string]string, bool) {
-	r.cacheMu.RLock()
-	e, ok := r.envCache[functionID]
-	r.cacheMu.RUnlock()
-	if !ok || time.Since(e.at) > r.cacheTTL {
-		return nil, false
-	}
-	return e.env, true
-}
-
-func (r *Registry) storeEnv(functionID string, env map[string]string) {
-	r.cacheMu.Lock()
-	r.envCache[functionID] = envCacheEntry{env: env, at: time.Now()}
-	r.cacheMu.Unlock()
-}
-
-// invalidateFn drops every cached version of (namespace, name). Called on
-// deploy/enable/disable/delete so a metadata change is never masked by the
-// cache beyond the write itself.
-func (r *Registry) invalidateFn(namespace, name string) {
-	prefix := strings.TrimSpace(namespace) + "\x00" + strings.TrimSpace(name) + "\x00"
-	r.cacheMu.Lock()
-	for k := range r.fnCache {
-		if strings.HasPrefix(k, prefix) {
-			delete(r.fnCache, k)
-		}
-	}
-	r.cacheMu.Unlock()
-}
-
-// invalidateEnv drops the cached env vars for a function ID. A redeploy REUSES
-// the existing function ID (Register: id = oldFn.ID) and rewrites env vars
-// under it, so without this an env-var change would be masked by the cache for
-// up to the TTL.
-func (r *Registry) invalidateEnv(functionID string) {
-	r.cacheMu.Lock()
-	delete(r.envCache, functionID)
-	r.cacheMu.Unlock()
-}
-
 // Register deploys a new function or updates an existing one.
 func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmBytes []byte) (*Function, error) {
 	if fn == nil {
@ -226,9 +128,6 @@ func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmByt
 		return nil, &DeployError{FunctionName: fn.Name, Cause: err}
 	}

-	r.invalidateFn(fn.Namespace, fn.Name)
-	r.invalidateEnv(id)
-
 	r.logger.Info("Function registered",
 		zap.String("id", id),
 		zap.String("name", fn.Name),
@ -247,12 +146,6 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
 	namespace = strings.TrimSpace(namespace)
 	name = strings.TrimSpace(name)

-	// Cache hit (bugboard #708): skip the leader-routed weak read entirely.
-	cacheKey := fnCacheKey(namespace, name, version)
-	if fn, ok := r.cachedFn(cacheKey); ok {
-		return fn, nil
-	}
-
 	var query string
 	var args []interface{}

@ -291,17 +184,13 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
 	}

 	if len(functions) == 0 {
-		// Do NOT cache misses — a just-deployed function must be visible
-		// immediately on the next call, not after the TTL.
 		if version == 0 {
 			return nil, ErrFunctionNotFound
 		}
 		return nil, ErrVersionNotFound
 	}

-	fn := r.rowToFunction(&functions[0])
-	r.storeFn(cacheKey, fn)
-	return fn, nil
+	return r.rowToFunction(&functions[0]), nil
 }

 // List returns all functions for a namespace.
@ -363,7 +252,6 @@ func (r *Registry) SetEnabled(ctx context.Context, namespace, name string, enabl
 	if rowsAffected == 0 {
 		return ErrFunctionNotFound
 	}
-	r.invalidateFn(namespace, name)
 	r.logger.Info("Function enabled-state updated",
 		zap.String("namespace", namespace),
 		zap.String("name", name),
@ -402,8 +290,6 @@ func (r *Registry) Delete(ctx context.Context, namespace, name string, version i
 		return ErrVersionNotFound
 	}

-	r.invalidateFn(namespace, name)
-
 	r.logger.Info("Function deleted",
 		zap.String("namespace", namespace),
 		zap.String("name", name),
@ -435,10 +321,6 @@ func (r *Registry) GetWASMBytes(ctx context.Context, wasmCID string) ([]byte, er

 // GetEnvVars retrieves environment variables for a function.
 func (r *Registry) GetEnvVars(ctx context.Context, functionID string) (map[string]string, error) {
-	if env, ok := r.cachedEnv(functionID); ok {
-		return env, nil
-	}
-
 	query := `SELECT key, value FROM function_env_vars WHERE function_id = ?`

 	var rows []envVarRow
@ -451,7 +333,6 @@ func (r *Registry) GetEnvVars(ctx context.Context, functionID string) (map[strin
 		envVars[row.Key] = row.Value
 	}

-	r.storeEnv(functionID, envVars)
 	return envVars, nil
 }

--- a/core/pkg/serverless/registry_cache_test.go
+++ b/core/pkg/serverless/registry_cache_test.go
@ -1,118 +0,0 @@
-package serverless
-
-import (
-	"testing"
-	"time"
-
-	"go.uber.org/zap"
-)
-
-// Bugboard #708 — function metadata + env vars are cached in-process so a burst
-// of invokes doesn't pay a leader-routed weak read per op. These pin the cache
-// hit/miss/TTL/invalidation behavior and the dedup'd authorization decision.
-
-func newTestRegistry() *Registry {
-	return NewRegistry(NewMockRQLite(), NewMockIPFSClient(), RegistryConfig{}, zap.NewNop())
-}
-
-func TestRegistryCache_hitAndInvalidate(t *testing.T) {
-	r := newTestRegistry()
-	key := fnCacheKey("ns", "fn", 0)
-	fn := &Function{ID: "id-1", Name: "fn", Namespace: "ns"}
-
-	if _, ok := r.cachedFn(key); ok {
-		t.Fatal("empty cache must miss")
-	}
-	r.storeFn(key, fn)
-	got, ok := r.cachedFn(key)
-	if !ok || got != fn {
-		t.Fatalf("expected cache hit returning the stored fn; ok=%v got=%v", ok, got)
-	}
-
-	// Deploy/enable/disable/delete must drop every cached version.
-	r.storeFn(fnCacheKey("ns", "fn", 3), &Function{ID: "id-3", Name: "fn", Namespace: "ns"})
-	r.invalidateFn("ns", "fn")
-	if _, ok := r.cachedFn(key); ok {
-		t.Error("invalidateFn must drop the version-0 entry")
-	}
-	if _, ok := r.cachedFn(fnCacheKey("ns", "fn", 3)); ok {
-		t.Error("invalidateFn must drop ALL versions of the function")
-	}
-}
-
-func TestRegistryCache_invalidateScopedToFunction(t *testing.T) {
-	r := newTestRegistry()
-	r.storeFn(fnCacheKey("ns", "keep", 0), &Function{ID: "k", Name: "keep", Namespace: "ns"})
-	r.storeFn(fnCacheKey("ns", "drop", 0), &Function{ID: "d", Name: "drop", Namespace: "ns"})
-
-	r.invalidateFn("ns", "drop")
-
-	if _, ok := r.cachedFn(fnCacheKey("ns", "drop", 0)); ok {
-		t.Error("target function must be invalidated")
-	}
-	if _, ok := r.cachedFn(fnCacheKey("ns", "keep", 0)); !ok {
-		t.Error("a DIFFERENT function must NOT be invalidated (prefix must include the null separator)")
-	}
-}
-
-func TestRegistryCache_ttlExpiry(t *testing.T) {
-	r := newTestRegistry()
-	key := fnCacheKey("ns", "fn", 0)
-	// Backdate the entry beyond the TTL.
-	r.fnCache[key] = fnCacheEntry{fn: &Function{ID: "x"}, at: time.Now().Add(-2 * r.cacheTTL)}
-	if _, ok := r.cachedFn(key); ok {
-		t.Error("an entry older than the TTL must be treated as a miss")
-	}
-}
-
-func TestRegistryCache_envHitAndTTL(t *testing.T) {
-	r := newTestRegistry()
-	if _, ok := r.cachedEnv("fid"); ok {
-		t.Fatal("empty env cache must miss")
-	}
-	r.storeEnv("fid", map[string]string{"K": "V"})
-	if env, ok := r.cachedEnv("fid"); !ok || env["K"] != "V" {
-		t.Fatalf("expected env cache hit; ok=%v env=%v", ok, env)
-	}
-	r.envCache["fid"] = envCacheEntry{env: map[string]string{"K": "V"}, at: time.Now().Add(-2 * r.cacheTTL)}
-	if _, ok := r.cachedEnv("fid"); ok {
-		t.Error("env entry older than the TTL must miss")
-	}
-}
-
-func TestRegistryCache_envInvalidatedOnRedeploy(t *testing.T) {
-	// A redeploy REUSES the function ID (Register: id = oldFn.ID) and rewrites
-	// env vars under it, so Register must drop the env cache for that ID — else
-	// a changed env var (e.g. a rotated endpoint) is masked for up to the TTL.
-	r := newTestRegistry()
-	r.storeEnv("fid", map[string]string{"K": "old"})
-	if env, ok := r.cachedEnv("fid"); !ok || env["K"] != "old" {
-		t.Fatal("precondition: env should be cached")
-	}
-	r.invalidateEnv("fid") // what Register now calls
-	if _, ok := r.cachedEnv("fid"); ok {
-		t.Error("env cache must be invalidated on redeploy (reused ID); a changed env var must not be served stale")
-	}
-}
-
-func TestRegistryCache_keyDistinctNoCollision(t *testing.T) {
-	// Guard the null-separated key: "a"+"bc" must not collide with "ab"+"c".
-	if fnCacheKey("a", "bc", 0) == fnCacheKey("ab", "c", 0) {
-		t.Error("cache keys must not collide across namespace/name boundaries")
-	}
-}
-
-func TestCanInvokeFn(t *testing.T) {
-	if !canInvokeFn(&Function{IsPublic: true}, "") {
-		t.Error("public function must be invokable by an anonymous caller")
-	}
-	if canInvokeFn(&Function{IsPublic: false}, "") {
-		t.Error("private function must reject an empty (anonymous) caller")
-	}
-	if canInvokeFn(&Function{IsPublic: false}, "   ") {
-		t.Error("private function must reject a whitespace-only caller")
-	}
-	if !canInvokeFn(&Function{IsPublic: false}, "wallet-abc") {
-		t.Error("private function must accept an identified caller")
-	}
-}
--- a/core/pkg/serverless/types.go
+++ b/core/pkg/serverless/types.go
@ -30,11 +30,6 @@ const (
 	TriggerTypePubSub    TriggerType = "pubsub"
 	TriggerTypeTimer     TriggerType = "timer"
 	TriggerTypeJob       TriggerType = "job"
-	// TriggerTypeInternal marks a gateway-initiated invocation with no end-user
-	// caller (e.g. the auth claims-provider hook at JWT mint time, bugboard
-	// #548). Treated as a system trigger so the per-caller authorization check
-	// is skipped — the gateway is the trusted invoker.
-	TriggerTypeInternal TriggerType = "internal"
 )

 // JobStatus represents the current state of a background job.
@ -239,8 +234,8 @@ type FunctionDefinition struct {
 	// When WSPersistent is true, the function exports ws_open/ws_frame/ws_close
 	// instead of using the default per-frame stateless model.
 	WSPersistent         bool `json:"ws_persistent,omitempty"`
-	WSIdleTimeoutSec     int  `json:"ws_idle_timeout_sec,omitempty"`      // 0 = no idle timeout
-	WSMaxFrameBytes      int  `json:"ws_max_frame_bytes,omitempty"`       // 0 = use default 256 KB
+	WSIdleTimeoutSec     int  `json:"ws_idle_timeout_sec,omitempty"`     // 0 = no idle timeout
+	WSMaxFrameBytes      int  `json:"ws_max_frame_bytes,omitempty"`      // 0 = use default 256 KB
 	WSMaxInflightPerConn int  `json:"ws_max_inflight_per_conn,omitempty"` // 0 = use default 64

 	// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835): the
@ -289,11 +284,11 @@ type Function struct {

 // InvocationContext provides context for a function invocation.
 type InvocationContext struct {
-	RequestID    string `json:"request_id"`
-	FunctionID   string `json:"function_id"`
-	FunctionName string `json:"function_name"`
-	Namespace    string `json:"namespace"`
-	CallerWallet string `json:"caller_wallet,omitempty"`
+	RequestID    string            `json:"request_id"`
+	FunctionID   string            `json:"function_id"`
+	FunctionName string            `json:"function_name"`
+	Namespace    string            `json:"namespace"`
+	CallerWallet string            `json:"caller_wallet,omitempty"`
 	// CallerIP is the source IP of the request, populated by HTTP/WS handlers.
 	// Used by the multi-tier rate limiter as a fallback bucket for anonymous
 	// (no-wallet) callers.
--- a/sdk/package.json
+++ b/sdk/package.json
@ -1,6 +1,6 @@
 {
  "name": "@debros/orama",
-  "version": "0.122.55",
+  "version": "0.122.47",
  "description": "TypeScript SDK for Orama Network - Database, PubSub, Cache, Storage, Vault, and more",
  "type": "module",
  "main": "./dist/index.js",
--- a/sdk/src/core/http.ts
+++ b/sdk/src/core/http.ts
@ -167,41 +167,6 @@ export class HttpClient {
    return this.baseURL;
  }

-  /**
-   * Normalize any thrown error into a typed SDKError so callers can branch on
-   * `.code`/`.httpStatus` instead of string-matching a bare platform
-   * `TypeError: Network request failed` (bugboard #129).
-   *
-   * - SDKError (an HTTP error response) passes through unchanged.
-   * - An AbortError (our own per-request timeout firing) → code "TIMEOUT".
-   * - Anything else (fetch rejects with a TypeError on DNS failure, connection
-   *   refused, offline, or TLS error) → code "NETWORK_ERROR".
-   *
-   * In every network case httpStatus is 0 (no HTTP response was received), which
-   * is how the app distinguishes "couldn't reach the gateway" from a real 4xx/5xx.
-   */
-  private normalizeError(error: unknown, timeoutMs: number): SDKError {
-    if (error instanceof SDKError) {
-      return error;
-    }
-    const name = (error as { name?: string })?.name;
-    const message = error instanceof Error ? error.message : String(error);
-    if (name === "AbortError") {
-      return new SDKError(
-        `request timed out after ${timeoutMs}ms`,
-        0,
-        "TIMEOUT",
-        { cause: name }
-      );
-    }
-    return new SDKError(
-      message || "network request failed",
-      0,
-      "NETWORK_ERROR",
-      { cause: name }
-    );
-  }
-
  async request<T = any>(
    method: "GET" | "POST" | "PUT" | "DELETE",
    path: string,
@ -333,14 +298,18 @@ export class HttpClient {
        }
      }

-      // Normalize native errors (TypeError, AbortError) into a typed SDKError
-      // so the app gets a stable `.code`/`.httpStatus` instead of a bare
-      // platform "Network request failed" (bugboard #129).
-      const sdkError = this.normalizeError(error, requestTimeout);
-
-      // Call the network error callback if configured. This allows the app to
-      // trigger gateway failover.
+      // Call the network error callback if configured
+      // This allows the app to trigger gateway failover
      if (this.onNetworkError) {
+        // Convert native errors (TypeError, AbortError) to SDKError for the callback
+        const sdkError =
+          error instanceof SDKError
+            ? error
+            : new SDKError(
+                error instanceof Error ? error.message : String(error),
+                0, // httpStatus 0 indicates network-level failure
+                "NETWORK_ERROR"
+              );
        this.onNetworkError(sdkError, {
          method,
          path,
@ -349,7 +318,7 @@ export class HttpClient {
        });
      }

-      throw sdkError;
+      throw error;
    } finally {
      clearTimeout(timeoutId);
    }
--- a/sdk/tests/unit/http/network-errors-bug-129.test.ts
+++ b/sdk/tests/unit/http/network-errors-bug-129.test.ts
@ -1,88 +0,0 @@
-import { describe, it, expect, vi } from "vitest";
-import { HttpClient } from "../../../src/core/http";
-import { SDKError } from "../../../src/errors";
-
-/**
- * Bugboard #129 — typed network errors.
- *
- * Before this fix the HttpClient re-threw the raw platform error on a
- * network-level failure, so a caller (e.g. AnChat's JwtSession) could only
- * tell "couldn't reach the gateway" apart from a real HTTP error by
- * string-matching `TypeError: Network request failed`. These guards lock in
- * that every transport failure surfaces as a typed SDKError with httpStatus 0
- * and a stable `.code`, while genuine HTTP errors keep their status/code.
- */
-describe("Bug #129 — HttpClient surfaces typed network errors", () => {
-  function client(fetchImpl: typeof fetch, onNetworkError?: any) {
-    return new HttpClient({
-      baseURL: "https://gw.example",
-      maxRetries: 0,
-      timeout: 5000,
-      fetch: fetchImpl,
-      onNetworkError,
-    });
-  }
-
-  it("maps a fetch TypeError (connection failure) to SDKError NETWORK_ERROR / status 0", async () => {
-    const fetchSpy = vi.fn(async () => {
-      throw new TypeError("Network request failed");
-    });
-    const err = await client(fetchSpy as any)
-      .post("/v1/auth/refresh", { refresh_token: "x" })
-      .catch((e) => e);
-
-    expect(err).toBeInstanceOf(SDKError);
-    expect(err.code).toBe("NETWORK_ERROR");
-    expect(err.httpStatus).toBe(0);
-    // Original platform message is preserved for diagnostics.
-    expect(err.message).toContain("Network request failed");
-  });
-
-  it("maps an AbortError (timeout) to SDKError TIMEOUT / status 0", async () => {
-    const fetchSpy = vi.fn(async () => {
-      const e = new Error("aborted");
-      e.name = "AbortError";
-      throw e;
-    });
-    const err = await client(fetchSpy as any)
-      .get("/v1/auth/challenge")
-      .catch((e) => e);
-
-    expect(err).toBeInstanceOf(SDKError);
-    expect(err.code).toBe("TIMEOUT");
-    expect(err.httpStatus).toBe(0);
-    expect(err.message).toContain("5000ms");
-  });
-
-  it("passes a real HTTP error through unchanged (not masked as NETWORK_ERROR)", async () => {
-    const fetchSpy = vi.fn(
-      async () =>
-        new Response(JSON.stringify({ error: "nope", code: "BAD_TOKEN" }), {
-          status: 401,
-          headers: { "content-type": "application/json" },
-        })
-    );
-    const err = await client(fetchSpy as any)
-      .post("/v1/auth/refresh", { refresh_token: "x" })
-      .catch((e) => e);
-
-    expect(err).toBeInstanceOf(SDKError);
-    expect(err.httpStatus).toBe(401);
-    expect(err.code).toBe("BAD_TOKEN");
-  });
-
-  it("hands the typed error (not the raw TypeError) to the onNetworkError callback", async () => {
-    const seen: SDKError[] = [];
-    const fetchSpy = vi.fn(async () => {
-      throw new TypeError("Failed to fetch");
-    });
-    await client(fetchSpy as any, (e: SDKError) => seen.push(e))
-      .get("/v1/db/read")
-      .catch(() => {});
-
-    expect(seen).toHaveLength(1);
-    expect(seen[0]).toBeInstanceOf(SDKError);
-    expect(seen[0].code).toBe("NETWORK_ERROR");
-    expect(seen[0].httpStatus).toBe(0);
-  });
-});
 @ -1 +1 @@
 .122.55
 .122.47