mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-06-16 22:54:12 +00:00
feat(auth): refresh-token custom claims hook (#548)
Custom JWT claims survive token refresh: migration 031 adds the custom-claims column to refresh tokens, the new gateway ClaimsProvider re-resolves claims on refresh, and the serverless invoke path carries them through. Includes refresh-rotation, WS-JWT middleware, and claims-provider test coverage.
This commit is contained in:
parent
8472861ed3
commit
d113b75497
15
core/migrations/031_refresh_token_custom_claims.sql
Normal file
15
core/migrations/031_refresh_token_custom_claims.sql
Normal file
@ -0,0 +1,15 @@
|
||||
-- =============================================================================
|
||||
-- 031_refresh_token_custom_claims.sql
|
||||
--
|
||||
-- Carry the additive JWT custom claims (e.g. the namespace's account_id from
|
||||
-- the auth-claims-provider hook, bugboard #548/#920) ALONGSIDE the refresh
|
||||
-- token, so a rotated access token keeps the same claims without re-invoking
|
||||
-- the namespace's claims-provider function on every 15-min refresh (the
|
||||
-- refresh path is the latency-critical VoIP-wake path, bugboard #125).
|
||||
--
|
||||
-- Resolved once at /v1/auth/verify mint time, stored here, and replayed +
|
||||
-- propagated across each rotation. NULL/absent = no custom claims (the default
|
||||
-- for every namespace without a claims-provider) → fully backward compatible.
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE refresh_tokens ADD COLUMN custom_claims TEXT;
|
||||
@ -43,9 +43,10 @@ type AuthService interface {
|
||||
// Verifies signature, expiration, and issuer.
|
||||
ParseAndVerifyJWT(token string) (*JWTClaims, error)
|
||||
|
||||
// GenerateJWT creates a new signed JWT with the specified claims and TTL.
|
||||
// GenerateJWT creates a new signed JWT with the specified subject, TTL, and
|
||||
// optional additive custom claims (nil = none; bugboard #548).
|
||||
// Returns: token, expirationUnix, error.
|
||||
GenerateJWT(namespace, subject string, ttl time.Duration) (string, int64, error)
|
||||
GenerateJWT(namespace, subject string, ttl time.Duration, custom map[string]string) (string, int64, error)
|
||||
|
||||
// RegisterApp registers a new client application with the gateway.
|
||||
// Returns an application ID that can be used for OAuth flows.
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
)
|
||||
|
||||
// sha256Hex returns the lowercase hex-encoded SHA-256 hash of the input string.
|
||||
@ -22,3 +23,34 @@ func HmacSHA256Hex(data, secret string) string {
|
||||
mac.Write([]byte(data))
|
||||
return hex.EncodeToString(mac.Sum(nil))
|
||||
}
|
||||
|
||||
// marshalClaims serializes additive JWT custom claims for storage alongside a
|
||||
// refresh token (bugboard #548). Empty/nil → "" so the column stays NULL-ish
|
||||
// and absent claims read back as nil.
|
||||
func marshalClaims(m map[string]string) string {
|
||||
if len(m) == 0 {
|
||||
return ""
|
||||
}
|
||||
b, err := json.Marshal(m)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// unmarshalClaims is the inverse of marshalClaims. An empty string or any
|
||||
// malformed value yields nil (fail-soft — a corrupt claims blob must never
|
||||
// break token rotation; the token simply rotates without custom claims).
|
||||
func unmarshalClaims(s string) map[string]string {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
var m map[string]string
|
||||
if err := json.Unmarshal([]byte(s), &m); err != nil {
|
||||
return nil
|
||||
}
|
||||
if len(m) == 0 {
|
||||
return nil
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
@ -182,15 +182,22 @@ func (s *Service) ParseAndVerifyJWT(token string) (*JWTClaims, error) {
|
||||
return &claims, nil
|
||||
}
|
||||
|
||||
func (s *Service) GenerateJWT(ns, subject string, ttl time.Duration) (string, int64, error) {
|
||||
// GenerateJWT mints a signed access token. `custom` carries additive
|
||||
// app-defined claims (e.g. the namespace's account_id from the claims-provider
|
||||
// hook, bugboard #548) under the top-level "custom" object — read back via
|
||||
// JWTClaims.Custom / oh.GetCallerClaim. Pass nil for none. Reserved claims
|
||||
// (sub/iss/aud/iat/nbf/exp/namespace) are always gateway-controlled and cannot
|
||||
// be overridden by `custom` (the caller is responsible for not putting
|
||||
// reserved keys here; the claims-provider path sanitizes them out upstream).
|
||||
func (s *Service) GenerateJWT(ns, subject string, ttl time.Duration, custom map[string]string) (string, int64, error) {
|
||||
// Prefer EdDSA when available
|
||||
if s.preferEdDSA && s.edSigningKey != nil {
|
||||
return s.generateEdDSAJWT(ns, subject, ttl)
|
||||
return s.generateEdDSAJWT(ns, subject, ttl, custom)
|
||||
}
|
||||
return s.generateRSAJWT(ns, subject, ttl)
|
||||
return s.generateRSAJWT(ns, subject, ttl, custom)
|
||||
}
|
||||
|
||||
func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration) (string, int64, error) {
|
||||
func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration, custom map[string]string) (string, int64, error) {
|
||||
if s.edSigningKey == nil {
|
||||
return "", 0, errors.New("EdDSA signing key unavailable")
|
||||
}
|
||||
@ -211,6 +218,9 @@ func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration) (strin
|
||||
"exp": exp.Unix(),
|
||||
"namespace": ns,
|
||||
}
|
||||
if len(custom) > 0 {
|
||||
payload["custom"] = custom
|
||||
}
|
||||
pb, _ := json.Marshal(payload)
|
||||
hb64 := base64.RawURLEncoding.EncodeToString(hb)
|
||||
pb64 := base64.RawURLEncoding.EncodeToString(pb)
|
||||
@ -220,7 +230,7 @@ func (s *Service) generateEdDSAJWT(ns, subject string, ttl time.Duration) (strin
|
||||
return signingInput + "." + sb64, exp.Unix(), nil
|
||||
}
|
||||
|
||||
func (s *Service) generateRSAJWT(ns, subject string, ttl time.Duration) (string, int64, error) {
|
||||
func (s *Service) generateRSAJWT(ns, subject string, ttl time.Duration, custom map[string]string) (string, int64, error) {
|
||||
if s.signingKey == nil {
|
||||
return "", 0, errors.New("signing key unavailable")
|
||||
}
|
||||
@ -241,6 +251,9 @@ func (s *Service) generateRSAJWT(ns, subject string, ttl time.Duration) (string,
|
||||
"exp": exp.Unix(),
|
||||
"namespace": ns,
|
||||
}
|
||||
if len(custom) > 0 {
|
||||
payload["custom"] = custom
|
||||
}
|
||||
pb, _ := json.Marshal(payload)
|
||||
hb64 := base64.RawURLEncoding.EncodeToString(hb)
|
||||
pb64 := base64.RawURLEncoding.EncodeToString(pb)
|
||||
|
||||
@ -31,8 +31,15 @@ type rotationMockORMDB struct {
|
||||
client.DatabaseClient
|
||||
mu sync.Mutex
|
||||
subjectByToken map[string]string // hashedToken -> subject (nil/missing = "invalid")
|
||||
claimsByToken map[string]string // hashedToken -> custom_claims JSON (bugboard #548)
|
||||
inserted int // count of INSERTs (new refresh-token rows)
|
||||
subjects map[string]string // subject -> last hashed token inserted
|
||||
// selectErrRemaining: number of upcoming "SELECT subject" calls that
|
||||
// should return selectErr (simulates a transient rqlite leader outage).
|
||||
// Decremented per matching call; 0 = serve normally (bugboard #125).
|
||||
selectErr error
|
||||
selectErrRemaining int
|
||||
selectAttemptsTaken int
|
||||
}
|
||||
|
||||
func (m *rotationMockORMDB) Query(_ context.Context, sql string, args ...interface{}) (*client.QueryResult, error) {
|
||||
@ -45,14 +52,23 @@ func (m *rotationMockORMDB) Query(_ context.Context, sql string, args ...interfa
|
||||
if containsCI(sql, "SELECT id FROM namespaces") {
|
||||
return &client.QueryResult{Count: 1, Rows: [][]interface{}{{int64(1)}}}, nil
|
||||
}
|
||||
// SELECT subject for the refresh-token lookup.
|
||||
if containsCI(sql, "SELECT subject FROM refresh_tokens") {
|
||||
// SELECT subject (+ custom_claims, bugboard #548) for the lookup.
|
||||
if containsCI(sql, "SELECT subject") && containsCI(sql, "FROM refresh_tokens") {
|
||||
m.selectAttemptsTaken++
|
||||
if m.selectErrRemaining > 0 {
|
||||
m.selectErrRemaining--
|
||||
return nil, m.selectErr
|
||||
}
|
||||
if len(args) < 2 {
|
||||
return &client.QueryResult{Count: 0}, nil
|
||||
}
|
||||
hashedTok, _ := args[1].(string)
|
||||
if subj, ok := m.subjectByToken[hashedTok]; ok && subj != "" {
|
||||
return &client.QueryResult{Count: 1, Rows: [][]interface{}{{subj}}}, nil
|
||||
claims := ""
|
||||
if m.claimsByToken != nil {
|
||||
claims = m.claimsByToken[hashedTok]
|
||||
}
|
||||
return &client.QueryResult{Count: 1, Rows: [][]interface{}{{subj, claims}}}, nil
|
||||
}
|
||||
return &client.QueryResult{Count: 0}, nil
|
||||
}
|
||||
@ -71,6 +87,14 @@ func (m *rotationMockORMDB) Query(_ context.Context, sql string, args ...interfa
|
||||
m.subjectByToken = map[string]string{}
|
||||
}
|
||||
m.subjectByToken[hashedTok] = subj
|
||||
// custom_claims is the LAST arg (bugboard #548) — capture it so
|
||||
// rotation-propagation tests can assert it carries forward.
|
||||
if m.claimsByToken == nil {
|
||||
m.claimsByToken = map[string]string{}
|
||||
}
|
||||
if cc, ok := args[len(args)-1].(string); ok {
|
||||
m.claimsByToken[hashedTok] = cc
|
||||
}
|
||||
}
|
||||
return &client.QueryResult{Count: 1}, nil
|
||||
}
|
||||
@ -369,3 +393,120 @@ func TestRefreshToken_RotatedTokenReplayFails(t *testing.T) {
|
||||
t.Fatal("expected error reusing rotated token, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
// Bugboard #125: a TRANSIENT rqlite error on the lookup (leader briefly
|
||||
// unavailable during a rolling restart) must surface as ErrRefreshTransient
|
||||
// (→ 503, retryable) — NOT "invalid or expired" (→ 401, full SIWE re-auth,
|
||||
// impossible on a locked device answering a VoIP-woken call).
|
||||
func TestRefreshToken_transientSelectError_returnsTransient(t *testing.T) {
|
||||
s, ormDB, _ := newRotationTestService(t)
|
||||
const refresh = "valid-but-leader-down"
|
||||
ormDB.subjectByToken[sha256Hex(refresh)] = "0xWALLET"
|
||||
// Every lookup attempt across the whole retry window errors.
|
||||
ormDB.selectErr = errors.New("rqlite: leadership lost")
|
||||
ormDB.selectErrRemaining = 99
|
||||
|
||||
_, _, _, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
|
||||
if !errors.Is(err, ErrRefreshTransient) {
|
||||
t.Fatalf("err = %v, want ErrRefreshTransient (a valid token must not 401 during a leader outage)", err)
|
||||
}
|
||||
}
|
||||
|
||||
// The lookup is retried, so a brief blip recovers transparently within one
|
||||
// refresh call (no client-visible failure at all).
|
||||
func TestRefreshToken_selectRecoversAfterRetry(t *testing.T) {
|
||||
s, ormDB, _ := newRotationTestService(t)
|
||||
const refresh = "valid-blips-then-ok"
|
||||
ormDB.subjectByToken[sha256Hex(refresh)] = "0xWALLET"
|
||||
ormDB.selectErr = errors.New("rqlite: leadership lost")
|
||||
ormDB.selectErrRemaining = refreshSelectRetries - 1 // fail all but the last attempt
|
||||
|
||||
access, newRefresh, subj, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
|
||||
if err != nil {
|
||||
t.Fatalf("RefreshToken should recover after transient blips: %v", err)
|
||||
}
|
||||
if access == "" || newRefresh == "" || subj != "0xWALLET" {
|
||||
t.Errorf("recovered refresh incomplete: access=%q newRefresh=%q subj=%q", access, newRefresh, subj)
|
||||
}
|
||||
}
|
||||
|
||||
// A transient error on the CAS write (revoke) is also retryable, not a 401.
|
||||
func TestRefreshToken_transientUpdateError_returnsTransient(t *testing.T) {
|
||||
s, ormDB, rq := newRotationTestService(t)
|
||||
const refresh = "valid-cas-write-down"
|
||||
ormDB.subjectByToken[sha256Hex(refresh)] = "0xWALLET"
|
||||
rq.execErrNext = []error{errors.New("rqlite: write failed, no leader")}
|
||||
|
||||
_, _, _, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
|
||||
if !errors.Is(err, ErrRefreshTransient) {
|
||||
t.Fatalf("err = %v, want ErrRefreshTransient on a transient CAS write error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// A genuinely unknown token must remain a hard invalid (401), NOT be masked as
|
||||
// transient — the distinction is the whole point of the #125 fix.
|
||||
func TestRefreshToken_unknownToken_isNotTransient(t *testing.T) {
|
||||
s, _, _ := newRotationTestService(t)
|
||||
_, _, _, _, err := s.RefreshToken(context.Background(), "never-existed", "anchat-test")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unknown token")
|
||||
}
|
||||
if errors.Is(err, ErrRefreshTransient) {
|
||||
t.Errorf("unknown token must be a genuine invalid (401), not transient (503): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// mockClaimsResolver is a fixed claims-provider stand-in for the mint tests.
|
||||
type mockClaimsResolver struct{ claims map[string]string }
|
||||
|
||||
func (m mockClaimsResolver) ResolveClaims(_ context.Context, _, _ string) map[string]string {
|
||||
return m.claims
|
||||
}
|
||||
|
||||
// Bugboard #548: claims resolved at IssueTokens (login) must be stored with the
|
||||
// refresh token AND replayed into the rotated access token — so account_id
|
||||
// survives the 15-min refresh without re-invoking the provider.
|
||||
func TestRefreshToken_propagatesCustomClaims(t *testing.T) {
|
||||
s, ormDB, _ := newRotationTestService(t)
|
||||
s.SetClaimsResolver(mockClaimsResolver{claims: map[string]string{"account_id": "u-999"}})
|
||||
|
||||
// Login mint — IssueTokens resolves + stores the claims with the refresh row.
|
||||
_, refresh, _, err := s.IssueTokens(context.Background(), "0xWALLET", "anchat-test")
|
||||
if err != nil {
|
||||
t.Fatalf("IssueTokens: %v", err)
|
||||
}
|
||||
if got := ormDB.claimsByToken[sha256Hex(refresh)]; got != `{"account_id":"u-999"}` {
|
||||
t.Fatalf("claims not stored with refresh token; got %q", got)
|
||||
}
|
||||
|
||||
// Refresh — the rotated access token must carry account_id, and the NEW
|
||||
// refresh row must propagate the stored claims.
|
||||
access, newRefresh, _, _, err := s.RefreshToken(context.Background(), refresh, "anchat-test")
|
||||
if err != nil {
|
||||
t.Fatalf("RefreshToken: %v", err)
|
||||
}
|
||||
claims, err := s.ParseAndVerifyJWT(access)
|
||||
if err != nil {
|
||||
t.Fatalf("ValidateJWT: %v", err)
|
||||
}
|
||||
if claims.Custom["account_id"] != "u-999" {
|
||||
t.Errorf("rotated access token lost account_id; custom=%v", claims.Custom)
|
||||
}
|
||||
if got := ormDB.claimsByToken[sha256Hex(newRefresh)]; got != `{"account_id":"u-999"}` {
|
||||
t.Errorf("rotation did not propagate claims to the new row; got %q", got)
|
||||
}
|
||||
|
||||
// Second rotation hop (N+1 → N+2): the claim must survive repeated
|
||||
// rotations, not just the first — the propagation is the whole point.
|
||||
access2, _, _, _, err := s.RefreshToken(context.Background(), newRefresh, "anchat-test")
|
||||
if err != nil {
|
||||
t.Fatalf("second RefreshToken: %v", err)
|
||||
}
|
||||
claims2, err := s.ParseAndVerifyJWT(access2)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseAndVerifyJWT (2nd): %v", err)
|
||||
}
|
||||
if claims2.Custom["account_id"] != "u-999" {
|
||||
t.Errorf("account_id lost across the second rotation; custom=%v", claims2.Custom)
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,7 +35,8 @@ type Service struct {
|
||||
edKeyID string
|
||||
preferEdDSA bool
|
||||
defaultNS string
|
||||
apiKeyHMACSecret string // HMAC secret for hashing API keys before storage
|
||||
apiKeyHMACSecret string // HMAC secret for hashing API keys before storage
|
||||
claimsResolver ClaimsResolver // namespace claims-provider hook (bugboard #548); nil = none
|
||||
}
|
||||
|
||||
func NewService(logger *logging.ColoredLogger, orm client.NetworkClient, signingKeyPEM string, defaultNS string) (*Service, error) {
|
||||
@ -84,6 +85,28 @@ func (s *Service) SetRqliteClient(db rqlite.Client) {
|
||||
s.db = db
|
||||
}
|
||||
|
||||
// ClaimsResolver resolves additive, namespace-defined JWT custom claims for an
|
||||
// authenticated wallet at token-mint time (bugboard #548/#920). The concrete
|
||||
// implementation invokes the namespace's reserved `auth-claims-provider`
|
||||
// serverless function; it MUST be fail-open (return nil, never error) so a
|
||||
// missing/slow/broken provider never breaks authentication. Injected via
|
||||
// SetClaimsResolver; nil = no custom claims (every namespace's default).
|
||||
type ClaimsResolver interface {
|
||||
ResolveClaims(ctx context.Context, wallet, namespace string) map[string]string
|
||||
}
|
||||
|
||||
// SetClaimsResolver wires the namespace claims-provider hook used at mint time.
|
||||
func (s *Service) SetClaimsResolver(r ClaimsResolver) { s.claimsResolver = r }
|
||||
|
||||
// resolveCustomClaims returns the namespace's additive claims for this wallet,
|
||||
// or nil. Fail-open by contract — the resolver never errors.
|
||||
func (s *Service) resolveCustomClaims(ctx context.Context, wallet, namespace string) map[string]string {
|
||||
if s.claimsResolver == nil {
|
||||
return nil
|
||||
}
|
||||
return s.claimsResolver.ResolveClaims(ctx, wallet, namespace)
|
||||
}
|
||||
|
||||
// ErrRotationNotConfigured is returned by RefreshToken when the service
|
||||
// wasn't given an rqlite client — refusing to rotate without atomicity
|
||||
// guarantees is safer than rotating non-atomically.
|
||||
@ -224,8 +247,13 @@ func (s *Service) IssueTokens(ctx context.Context, wallet, namespace string) (st
|
||||
return "", "", 0, fmt.Errorf("signing key unavailable")
|
||||
}
|
||||
|
||||
// Resolve namespace-defined additive claims (bugboard #548) ONCE at mint
|
||||
// time. Stored with the refresh token below and replayed across rotations
|
||||
// so the 15-min refresh path never re-invokes the provider.
|
||||
custom := s.resolveCustomClaims(ctx, wallet, namespace)
|
||||
|
||||
// Issue access token (15m)
|
||||
token, expUnix, err := s.GenerateJWT(namespace, wallet, 15*time.Minute)
|
||||
token, expUnix, err := s.GenerateJWT(namespace, wallet, 15*time.Minute, custom)
|
||||
if err != nil {
|
||||
return "", "", 0, fmt.Errorf("failed to generate JWT: %w", err)
|
||||
}
|
||||
@ -246,8 +274,8 @@ func (s *Service) IssueTokens(ctx context.Context, wallet, namespace string) (st
|
||||
db := s.orm.Database()
|
||||
hashedRefresh := sha256Hex(refresh)
|
||||
if _, err := db.Query(internalCtx,
|
||||
"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at) VALUES (?, ?, ?, ?, datetime('now', '+30 days'))",
|
||||
nsID, wallet, hashedRefresh, "gateway",
|
||||
"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at, custom_claims) VALUES (?, ?, ?, ?, datetime('now', '+30 days'), ?)",
|
||||
nsID, wallet, hashedRefresh, "gateway", marshalClaims(custom),
|
||||
); err != nil {
|
||||
return "", "", 0, fmt.Errorf("failed to store refresh token: %w", err)
|
||||
}
|
||||
@ -265,6 +293,27 @@ func (s *Service) IssueTokens(ctx context.Context, wallet, namespace string) (st
|
||||
// This is the tripwire promised by RFC 9700 §4.12 (refresh-token rotation).
|
||||
var ErrRefreshTokenReplay = fmt.Errorf("refresh token already rotated or invalid")
|
||||
|
||||
// ErrRefreshTransient is returned when refresh-token rotation fails for a
|
||||
// RETRYABLE reason — an rqlite-layer error rather than a genuine bad/expired
|
||||
// token. Bugboard #125: during a rolling gateway restart the rqlite leader is
|
||||
// briefly unavailable (re-election window), so the lookup/rotation errors;
|
||||
// collapsing that into "invalid token" forces a 401 → full SIWE re-auth, which
|
||||
// is impossible on a locked device answering a VoIP-woken call. Callers MUST
|
||||
// surface this as a retryable 503, NOT a 401, so the client retries within the
|
||||
// ring window instead of tearing down the session.
|
||||
var ErrRefreshTransient = fmt.Errorf("refresh token rotation temporarily unavailable")
|
||||
|
||||
const (
|
||||
// refreshSelectRetries bounds how many times the refresh lookup is retried
|
||||
// when the rqlite read errors (transient leader unavailability). The read
|
||||
// is idempotent and happens BEFORE any write, so retrying is safe.
|
||||
refreshSelectRetries = 3
|
||||
// refreshSelectRetryDelay is the backoff between lookup retries. Three
|
||||
// tries × 250ms rides out a brief leader re-election without adding
|
||||
// meaningful latency to the common (healthy-leader) path.
|
||||
refreshSelectRetryDelay = 250 * time.Millisecond
|
||||
)
|
||||
|
||||
// RefreshToken validates the supplied refresh token, atomically rotates it
|
||||
// (revokes the old, mints a new), and returns a fresh access token alongside
|
||||
// the rotated refresh token.
|
||||
@ -309,22 +358,61 @@ func (s *Service) RefreshToken(ctx context.Context, refreshToken, namespace stri
|
||||
|
||||
nsID, err := s.ResolveNamespaceID(ctx, namespace)
|
||||
if err != nil {
|
||||
return "", "", "", 0, err
|
||||
// Bugboard #125: namespace resolution runs an rqlite query BEFORE the
|
||||
// token lookup, so a leader re-election during a rolling restart fails
|
||||
// here too. Treat it as retryable (→ 503), not a bad token (→ 401) —
|
||||
// the refresh-path namespace comes from an already-authenticated
|
||||
// session, so a resolution failure is a transient DB error, never the
|
||||
// client's fault.
|
||||
s.logger.ComponentWarn(logging.ComponentGeneral,
|
||||
"refresh namespace resolution failed (transient, surfacing retryable)",
|
||||
zap.String("namespace", namespace),
|
||||
zap.Error(err))
|
||||
return "", "", "", 0, ErrRefreshTransient
|
||||
}
|
||||
|
||||
hashedRefresh := sha256Hex(refreshToken)
|
||||
|
||||
// Step 1: read the subject. Tells us who the token belongs to AND
|
||||
// validates that it's currently usable (not revoked, not expired).
|
||||
selectQ := `SELECT subject FROM refresh_tokens
|
||||
//
|
||||
// Bugboard #125: distinguish a TRANSIENT rqlite error (leader briefly
|
||||
// unavailable during a rolling restart) from a GENUINE token miss. The
|
||||
// read is idempotent and pre-write, so we retry it a few times; only after
|
||||
// exhausting retries do we surface ErrRefreshTransient (→ 503, client
|
||||
// retries). An actual empty result (Count == 0) is a real bad/expired
|
||||
// token → "invalid or expired" (→ 401). Collapsing the two used to 401 a
|
||||
// valid session during every restart, defeating the VoIP-wake refresh.
|
||||
selectQ := `SELECT subject, custom_claims FROM refresh_tokens
|
||||
WHERE namespace_id = ? AND token = ?
|
||||
AND revoked_at IS NULL
|
||||
AND (expires_at IS NULL OR expires_at > datetime('now'))
|
||||
LIMIT 1`
|
||||
res, err := ormDB.Query(internalCtx, selectQ, nsID, hashedRefresh)
|
||||
if err != nil || res == nil || res.Count == 0 {
|
||||
var res *client.QueryResult
|
||||
var selErr error
|
||||
for attempt := 0; attempt < refreshSelectRetries; attempt++ {
|
||||
res, selErr = ormDB.Query(internalCtx, selectQ, nsID, hashedRefresh)
|
||||
if selErr == nil && res != nil {
|
||||
break
|
||||
}
|
||||
if attempt < refreshSelectRetries-1 {
|
||||
time.Sleep(refreshSelectRetryDelay)
|
||||
}
|
||||
}
|
||||
if selErr != nil || res == nil {
|
||||
// rqlite error persisted across retries — leader likely mid-election.
|
||||
// Retryable, NOT an invalid token.
|
||||
s.logger.ComponentWarn(logging.ComponentGeneral,
|
||||
"refresh token lookup failed (transient rqlite error, surfacing retryable)",
|
||||
zap.String("namespace", namespace),
|
||||
zap.Error(selErr))
|
||||
return "", "", "", 0, ErrRefreshTransient
|
||||
}
|
||||
if res.Count == 0 {
|
||||
// Genuinely not found / revoked / expired — a real bad token.
|
||||
return "", "", "", 0, fmt.Errorf("invalid or expired refresh token")
|
||||
}
|
||||
var customClaimsJSON string
|
||||
if len(res.Rows) > 0 && len(res.Rows[0]) > 0 {
|
||||
if val, ok := res.Rows[0][0].(string); ok {
|
||||
subject = val
|
||||
@ -332,7 +420,15 @@ func (s *Service) RefreshToken(ctx context.Context, refreshToken, namespace stri
|
||||
b, _ := json.Marshal(res.Rows[0][0])
|
||||
_ = json.Unmarshal(b, &subject)
|
||||
}
|
||||
// custom_claims (bugboard #548) — resolved once at login, replayed on
|
||||
// every rotation so the refresh path never re-invokes the provider.
|
||||
if len(res.Rows[0]) > 1 {
|
||||
if cc, ok := res.Rows[0][1].(string); ok {
|
||||
customClaimsJSON = cc
|
||||
}
|
||||
}
|
||||
}
|
||||
custom := unmarshalClaims(customClaimsJSON)
|
||||
|
||||
// Step 2: atomic CAS — revoke the old row. RowsAffected is the lock.
|
||||
// Two concurrent calls with the same refresh token: exactly one wins
|
||||
@ -343,7 +439,13 @@ func (s *Service) RefreshToken(ctx context.Context, refreshToken, namespace stri
|
||||
WHERE namespace_id = ? AND token = ? AND revoked_at IS NULL`,
|
||||
nsID, hashedRefresh)
|
||||
if err != nil {
|
||||
return "", "", "", 0, fmt.Errorf("revoke old refresh token: %w", err)
|
||||
// rqlite write error (leader unavailable) — retryable, not a bad
|
||||
// token. No row was revoked, so a client retry is safe (bugboard #125).
|
||||
s.logger.ComponentWarn(logging.ComponentGeneral,
|
||||
"refresh token revoke failed (transient rqlite error, surfacing retryable)",
|
||||
zap.String("namespace", namespace),
|
||||
zap.Error(err))
|
||||
return "", "", "", 0, ErrRefreshTransient
|
||||
}
|
||||
affected, _ := updRes.RowsAffected()
|
||||
if affected == 0 {
|
||||
@ -359,8 +461,9 @@ func (s *Service) RefreshToken(ctx context.Context, refreshToken, namespace stri
|
||||
return "", "", "", 0, ErrRefreshTokenReplay
|
||||
}
|
||||
|
||||
// Step 3: mint the new access JWT.
|
||||
accessToken, expUnix, err = s.GenerateJWT(namespace, subject, 15*time.Minute)
|
||||
// Step 3: mint the new access JWT, carrying forward the stored custom
|
||||
// claims so a rotated token keeps the same account_id etc. (bugboard #548).
|
||||
accessToken, expUnix, err = s.GenerateJWT(namespace, subject, 15*time.Minute, custom)
|
||||
if err != nil {
|
||||
return "", "", "", 0, fmt.Errorf("generate access token: %w", err)
|
||||
}
|
||||
@ -376,10 +479,23 @@ func (s *Service) RefreshToken(ctx context.Context, refreshToken, namespace stri
|
||||
}
|
||||
newRefreshToken = base64.RawURLEncoding.EncodeToString(rbuf)
|
||||
hashedNew := sha256Hex(newRefreshToken)
|
||||
// Re-marshal from the parsed map (not the raw stored string) so the new
|
||||
// row and the freshly-minted access token are provably consistent and
|
||||
// self-healing — a malformed stored blob converges to "" on both sides
|
||||
// rather than being propagated forward verbatim. custom_claims is written
|
||||
// ONLY here and in IssueTokens, both from a sanitized map (bugboard #548).
|
||||
if _, err := ormDB.Query(internalCtx,
|
||||
"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at) VALUES (?, ?, ?, ?, datetime('now', '+30 days'))",
|
||||
nsID, subject, hashedNew, "gateway"); err != nil {
|
||||
return "", "", "", 0, fmt.Errorf("store rotated refresh token: %w", err)
|
||||
"INSERT INTO refresh_tokens(namespace_id, subject, token, audience, expires_at, custom_claims) VALUES (?, ?, ?, ?, datetime('now', '+30 days'), ?)",
|
||||
nsID, subject, hashedNew, "gateway", marshalClaims(custom)); err != nil {
|
||||
// The old token is already revoked (step 2). A retryable error here
|
||||
// leaves the client to re-attempt — which will re-auth since the old
|
||||
// token is gone — but that's strictly better than masking a transient
|
||||
// failure as a permanent 401 (bugboard #125). Surface retryable.
|
||||
s.logger.ComponentWarn(logging.ComponentGeneral,
|
||||
"refresh token store failed after revoke (transient rqlite error)",
|
||||
zap.String("namespace", namespace),
|
||||
zap.Error(err))
|
||||
return "", "", "", 0, ErrRefreshTransient
|
||||
}
|
||||
|
||||
return accessToken, newRefreshToken, subject, expUnix, nil
|
||||
|
||||
@ -112,7 +112,7 @@ func TestJWTFlow(t *testing.T) {
|
||||
sub := "0x1234567890abcdef1234567890abcdef12345678"
|
||||
ttl := 15 * time.Minute
|
||||
|
||||
token, exp, err := s.GenerateJWT(ns, sub, ttl)
|
||||
token, exp, err := s.GenerateJWT(ns, sub, ttl, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT failed: %v", err)
|
||||
}
|
||||
@ -192,7 +192,7 @@ func TestEdDSAJWTFlow(t *testing.T) {
|
||||
ttl := 15 * time.Minute
|
||||
|
||||
// With EdDSA preferred, GenerateJWT should produce an EdDSA token
|
||||
token, exp, err := s.GenerateJWT(ns, sub, ttl)
|
||||
token, exp, err := s.GenerateJWT(ns, sub, ttl, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT (EdDSA) failed: %v", err)
|
||||
}
|
||||
@ -233,7 +233,7 @@ func TestRS256BackwardCompat(t *testing.T) {
|
||||
|
||||
// Generate an RS256 token directly (simulating a legacy token)
|
||||
s.preferEdDSA = false
|
||||
token, _, err := s.GenerateJWT("test-ns", "user1", 15*time.Minute)
|
||||
token, _, err := s.GenerateJWT("test-ns", "user1", 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT (RS256) failed: %v", err)
|
||||
}
|
||||
@ -447,7 +447,7 @@ func TestEdDSACrossServiceVerify(t *testing.T) {
|
||||
|
||||
const wantSub = "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE"
|
||||
const wantNS = "anchat-test"
|
||||
token, _, err := signer.GenerateJWT(wantNS, wantSub, 15*time.Minute)
|
||||
token, _, err := signer.GenerateJWT(wantNS, wantSub, 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("signer.GenerateJWT: %v", err)
|
||||
}
|
||||
@ -478,7 +478,7 @@ func TestEdDSACrossServiceVerify_differentKeysFail(t *testing.T) {
|
||||
_, verKey, _ := ed25519.GenerateKey(rand.Reader)
|
||||
verifier.SetEdDSAKey(verKey)
|
||||
|
||||
token, _, err := signer.GenerateJWT("ns", "sub", 15*time.Minute)
|
||||
token, _, err := signer.GenerateJWT("ns", "sub", 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT: %v", err)
|
||||
}
|
||||
|
||||
178
core/pkg/gateway/claims_provider.go
Normal file
178
core/pkg/gateway/claims_provider.go
Normal file
@ -0,0 +1,178 @@
|
||||
package gateway
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/serverless"
|
||||
"github.com/DeBrosOfficial/network/pkg/serverless/registry"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Claims-provider hook (bugboard #548/#920).
|
||||
//
|
||||
// A namespace opts into additive, signed JWT claims by deploying a serverless
|
||||
// function with the RESERVED name "auth-claims-provider". At /v1/auth/verify
|
||||
// mint time the gateway invokes it (in the namespace's own context, so it can
|
||||
// read the namespace's tables) with {"wallet","namespace"} and merges the
|
||||
// string→string object it returns into the JWT's custom claims — e.g.
|
||||
// {"account_id":"<users.user_id>"} so push devices key on the stable account
|
||||
// identity rather than the authenticating wallet.
|
||||
//
|
||||
// Hard guarantees:
|
||||
// - FAIL-OPEN: a missing / slow / erroring / malformed provider yields NO
|
||||
// claims; authentication never breaks because a claims function is down.
|
||||
// - Reserved claims (sub/iss/aud/iat/nbf/exp/namespace/custom) can never be
|
||||
// set by the provider — the gateway controls those.
|
||||
// - Bounded: timeout, max claim count, max total size.
|
||||
|
||||
const (
|
||||
// claimsProviderFnName is the reserved function name a namespace deploys to
|
||||
// inject additive JWT claims at mint time.
|
||||
claimsProviderFnName = "auth-claims-provider"
|
||||
// claimsProviderTimeout bounds the provider invocation so a slow/hung
|
||||
// function never stalls the auth path past this budget (fail-open after).
|
||||
claimsProviderTimeout = 2 * time.Second
|
||||
// maxCustomClaims / maxCustomClaimsBytes cap what a provider may inject —
|
||||
// JWTs ride in headers, and an unbounded claim blob is a DoS / cost vector.
|
||||
maxCustomClaims = 16
|
||||
maxCustomClaimsBytes = 4096
|
||||
// claimsProviderWarnInterval rate-limits the fail-open WARN so a broken
|
||||
// provider doesn't flood the log on every login.
|
||||
claimsProviderWarnInterval = 30 * time.Second
|
||||
)
|
||||
|
||||
// reservedClaimKeys can never be injected by a namespace claims provider; the
|
||||
// gateway owns these. A provider that returns any of them has them dropped.
|
||||
var reservedClaimKeys = map[string]struct{}{
|
||||
"sub": {}, "iss": {}, "aud": {}, "iat": {},
|
||||
"nbf": {}, "exp": {}, "namespace": {}, "custom": {},
|
||||
}
|
||||
|
||||
// jwtClaimsProvider implements auth.ClaimsResolver by invoking the namespace's
|
||||
// reserved auth-claims-provider function.
|
||||
type jwtClaimsProvider struct {
|
||||
invoker *serverless.Invoker
|
||||
logger *zap.Logger
|
||||
|
||||
mu sync.Mutex
|
||||
lastWarnUTC time.Time
|
||||
}
|
||||
|
||||
// newJWTClaimsProvider builds the resolver. A nil invoker disables the hook
|
||||
// (ResolveClaims returns nil).
|
||||
func newJWTClaimsProvider(invoker *serverless.Invoker, logger *zap.Logger) *jwtClaimsProvider {
|
||||
if logger == nil {
|
||||
logger = zap.NewNop()
|
||||
}
|
||||
return &jwtClaimsProvider{invoker: invoker, logger: logger.Named("claims-provider")}
|
||||
}
|
||||
|
||||
// ResolveClaims invokes the namespace's auth-claims-provider and returns the
|
||||
// sanitized additive claims, or nil. Never errors (fail-open contract).
|
||||
func (p *jwtClaimsProvider) ResolveClaims(ctx context.Context, wallet, namespace string) map[string]string {
|
||||
if p.invoker == nil || wallet == "" || namespace == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
input, err := json.Marshal(map[string]string{"wallet": wallet, "namespace": namespace})
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
callCtx, cancel := context.WithTimeout(ctx, claimsProviderTimeout)
|
||||
defer cancel()
|
||||
|
||||
resp, err := p.invoker.Invoke(callCtx, &serverless.InvokeRequest{
|
||||
Namespace: namespace,
|
||||
FunctionName: claimsProviderFnName,
|
||||
Input: input,
|
||||
// Gateway-initiated, no end-user caller → system trigger skips the
|
||||
// per-caller authorization check.
|
||||
TriggerType: serverless.TriggerTypeInternal,
|
||||
})
|
||||
if err != nil || resp == nil {
|
||||
// The namespace simply hasn't deployed the function (registry miss) is
|
||||
// the normal no-claims case for most namespaces — stay silent. Any
|
||||
// other failure is a real problem worth a rate-limited WARN.
|
||||
if !errors.Is(err, registry.ErrFunctionNotFound) {
|
||||
p.warnRateLimited("claims provider invoke failed (minting without custom claims)",
|
||||
namespace, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if resp.Status != serverless.InvocationStatusSuccess {
|
||||
p.warnRateLimited("claims provider returned non-success (minting without custom claims)",
|
||||
namespace, nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
return sanitizeProviderClaims(resp.Output)
|
||||
}
|
||||
|
||||
// sanitizeProviderClaims parses the provider's RAW stdout as a bare JSON object
|
||||
// of additive claims (NOT an {ok,result} Ack envelope — per the #976 contract)
|
||||
// and returns a safe string→string subset: string values only, reserved keys
|
||||
// dropped, bounded count and total size. Any parse failure → nil (fail-open).
|
||||
func sanitizeProviderClaims(raw []byte) map[string]string {
|
||||
if len(raw) == 0 || len(raw) > maxCustomClaimsBytes {
|
||||
return nil
|
||||
}
|
||||
var obj map[string]any
|
||||
if err := json.Unmarshal(raw, &obj); err != nil || len(obj) == 0 {
|
||||
return nil
|
||||
}
|
||||
// Iterate in sorted key order so an over-budget provider payload truncates
|
||||
// DETERMINISTICALLY (Go map iteration is randomized) — the same output must
|
||||
// always yield the same claims, never a per-login-varying subset.
|
||||
keys := make([]string, 0, len(obj))
|
||||
for k := range obj {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
out := make(map[string]string, len(obj))
|
||||
total := 0
|
||||
for _, k := range keys {
|
||||
if len(out) >= maxCustomClaims {
|
||||
break
|
||||
}
|
||||
if _, reserved := reservedClaimKeys[k]; reserved {
|
||||
continue
|
||||
}
|
||||
s, ok := obj[k].(string) // string→string contract; non-string values dropped
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
total += len(k) + len(s)
|
||||
if total > maxCustomClaimsBytes {
|
||||
break
|
||||
}
|
||||
out[k] = s
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (p *jwtClaimsProvider) warnRateLimited(msg, namespace string, err error) {
|
||||
p.mu.Lock()
|
||||
now := time.Now()
|
||||
if now.Sub(p.lastWarnUTC) < claimsProviderWarnInterval {
|
||||
p.mu.Unlock()
|
||||
return
|
||||
}
|
||||
p.lastWarnUTC = now
|
||||
p.mu.Unlock()
|
||||
|
||||
fields := []zap.Field{zap.String("namespace", namespace), zap.String("function", claimsProviderFnName)}
|
||||
if err != nil {
|
||||
fields = append(fields, zap.Error(err))
|
||||
}
|
||||
p.logger.Warn(msg, fields...)
|
||||
}
|
||||
98
core/pkg/gateway/claims_provider_test.go
Normal file
98
core/pkg/gateway/claims_provider_test.go
Normal file
@ -0,0 +1,98 @@
|
||||
package gateway
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Bugboard #548: the claims-provider sanitizer is the security boundary —
|
||||
// a namespace function must NOT be able to forge reserved claims, inject
|
||||
// non-string values, or blow the size budget.
|
||||
|
||||
func TestSanitizeProviderClaims_happyPath(t *testing.T) {
|
||||
out := sanitizeProviderClaims([]byte(`{"account_id":"u-123","tier":"pro"}`))
|
||||
if out["account_id"] != "u-123" || out["tier"] != "pro" {
|
||||
t.Fatalf("expected additive claims, got %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeProviderClaims_dropsReservedKeys(t *testing.T) {
|
||||
// A malicious provider tries to override sub/exp/namespace — must be dropped.
|
||||
out := sanitizeProviderClaims([]byte(`{"sub":"0xATTACKER","exp":"9999999999","namespace":"evil","account_id":"u-1"}`))
|
||||
for _, k := range []string{"sub", "exp", "namespace"} {
|
||||
if _, present := out[k]; present {
|
||||
t.Errorf("reserved key %q must be dropped, got %v", k, out)
|
||||
}
|
||||
}
|
||||
if out["account_id"] != "u-1" {
|
||||
t.Errorf("legitimate claim dropped: %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeProviderClaims_nonStringValuesDropped(t *testing.T) {
|
||||
out := sanitizeProviderClaims([]byte(`{"account_id":"u-1","num":5,"obj":{"a":1},"arr":[1],"ok":"yes"}`))
|
||||
if len(out) != 2 || out["account_id"] != "u-1" || out["ok"] != "yes" {
|
||||
t.Errorf("non-string values must be dropped; got %v", out)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeProviderClaims_failOpenOnGarbage(t *testing.T) {
|
||||
for _, bad := range [][]byte{
|
||||
nil,
|
||||
[]byte(``),
|
||||
[]byte(`not json`),
|
||||
[]byte(`[1,2,3]`), // array, not object
|
||||
[]byte(`"just a string"`), // scalar
|
||||
[]byte(`{}`), // empty object
|
||||
[]byte(`{"ok":true,"result":{"account_id":"u"}}`), // Ack envelope (wrong shape) → no top-level string claims
|
||||
} {
|
||||
if got := sanitizeProviderClaims(bad); got != nil {
|
||||
t.Errorf("garbage %q must yield nil (fail-open), got %v", bad, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeProviderClaims_countAndSizeCapped(t *testing.T) {
|
||||
// Way more than maxCustomClaims string entries.
|
||||
buf := []byte("{")
|
||||
for i := 0; i < maxCustomClaims+20; i++ {
|
||||
if i > 0 {
|
||||
buf = append(buf, ',')
|
||||
}
|
||||
buf = append(buf, []byte(`"k`)...)
|
||||
buf = append(buf, []byte(itoa(i))...)
|
||||
buf = append(buf, []byte(`":"v"`)...)
|
||||
}
|
||||
buf = append(buf, '}')
|
||||
out := sanitizeProviderClaims(buf)
|
||||
if len(out) > maxCustomClaims {
|
||||
t.Errorf("claim count not capped: got %d, max %d", len(out), maxCustomClaims)
|
||||
}
|
||||
|
||||
// Oversized total payload → rejected outright.
|
||||
big := make([]byte, maxCustomClaimsBytes+10)
|
||||
for i := range big {
|
||||
big[i] = 'a'
|
||||
}
|
||||
if got := sanitizeProviderClaims(big); got != nil {
|
||||
t.Errorf("oversized payload must be rejected, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveClaims_nilInvokerOrEmptyArgs(t *testing.T) {
|
||||
p := newJWTClaimsProvider(nil, nil) // nil invoker disables the hook
|
||||
if got := p.ResolveClaims(nil, "0xW", "ns"); got != nil {
|
||||
t.Errorf("nil invoker must yield nil claims, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func itoa(n int) string {
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
var b []byte
|
||||
for n > 0 {
|
||||
b = append([]byte{byte('0' + n%10)}, b...)
|
||||
n /= 10
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
@ -648,6 +648,14 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
|
||||
authService.SetRqliteClient(deps.ORMClient)
|
||||
}
|
||||
|
||||
// Wire the namespace claims-provider hook (bugboard #548): at JWT mint time
|
||||
// the auth service invokes the namespace's reserved `auth-claims-provider`
|
||||
// function (if deployed) and merges its additive claims (e.g. account_id)
|
||||
// into the token. Fail-open — a missing/slow provider never breaks auth.
|
||||
if deps.ServerlessInvoker != nil {
|
||||
authService.SetClaimsResolver(newJWTClaimsProvider(deps.ServerlessInvoker, logger.Logger))
|
||||
}
|
||||
|
||||
// Load or create EdDSA key for new JWT tokens. Bug #215 fix: when
|
||||
// cfg.ClusterSecret is set, the key is derived deterministically from
|
||||
// it via HKDF, so every gateway in the cluster shares the same Ed25519
|
||||
|
||||
@ -393,6 +393,32 @@ func TestRefreshHandler_NilAuthService(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Bugboard #125: a non-bad-token failure (here ErrRotationNotConfigured from a
|
||||
// service with no rqlite client) must surface as a RETRYABLE 503 with a
|
||||
// Retry-After header — NOT a 401 that would force a locked device into an
|
||||
// impossible SIWE re-auth mid-call-ring.
|
||||
func TestRefreshHandler_TransientError_returns503Retryable(t *testing.T) {
|
||||
svc, err := authsvc.NewService(testLogger(), nil, "", "default")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create auth service: %v", err)
|
||||
}
|
||||
h := NewHandlers(testLogger(), svc, nil, "default", noopInternalAuth)
|
||||
|
||||
body, _ := json.Marshal(RefreshRequest{RefreshToken: "some-valid-looking-token"})
|
||||
req := httptest.NewRequest(http.MethodPost, "/v1/auth/refresh", bytes.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
h.RefreshHandler(rec, req)
|
||||
|
||||
if rec.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("transient refresh failure must be 503, got %d", rec.Code)
|
||||
}
|
||||
if rec.Header().Get("Retry-After") == "" {
|
||||
t.Error("503 refresh response should carry a Retry-After header")
|
||||
}
|
||||
}
|
||||
|
||||
// --- APIKeyToJWTHandler tests ---------------------------------------------
|
||||
|
||||
func TestAPIKeyToJWTHandler_MissingKey(t *testing.T) {
|
||||
|
||||
@ -2,6 +2,7 @@ package auth
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@ -57,7 +58,7 @@ func (h *Handlers) APIKeyToJWTHandler(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
token, expUnix, err := h.authService.GenerateJWT(ns, key, 15*time.Minute)
|
||||
token, expUnix, err := h.authService.GenerateJWT(ns, key, 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusInternalServerError, err.Error())
|
||||
return
|
||||
@ -103,11 +104,20 @@ func (h *Handlers) RefreshHandler(w http.ResponseWriter, r *http.Request) {
|
||||
// the SDK persists it (bug #239 fix) and uses it on the next refresh.
|
||||
token, newRefreshToken, subject, expUnix, err := h.authService.RefreshToken(r.Context(), req.RefreshToken, req.Namespace)
|
||||
if err != nil {
|
||||
// The service emits a WARN log on replay (ErrRefreshTokenReplay)
|
||||
// so the operator can investigate. We surface a generic 401 here
|
||||
// regardless — leaking "your token was already used" to the
|
||||
// caller would help an attacker confirm a stolen token has been
|
||||
// rotated.
|
||||
// Bugboard #125: a TRANSIENT rotation failure (rqlite leader briefly
|
||||
// unavailable during a rolling restart) must surface as a retryable
|
||||
// 503 — NOT a 401 — so the client retries within the call-ring window
|
||||
// instead of tearing the session down to a full SIWE re-auth, which is
|
||||
// impossible on a locked device answering a VoIP-woken call.
|
||||
if errors.Is(err, authsvc.ErrRefreshTransient) || errors.Is(err, authsvc.ErrRotationNotConfigured) {
|
||||
w.Header().Set("Retry-After", "1")
|
||||
writeError(w, http.StatusServiceUnavailable, "refresh temporarily unavailable, retry")
|
||||
return
|
||||
}
|
||||
// Genuine bad/expired/replayed token. The service emits a WARN log on
|
||||
// replay (ErrRefreshTokenReplay) so the operator can investigate. We
|
||||
// surface a generic 401 regardless — leaking "your token was already
|
||||
// used" would help an attacker confirm a stolen token was rotated.
|
||||
writeError(w, http.StatusUnauthorized, "invalid or expired refresh token")
|
||||
return
|
||||
}
|
||||
|
||||
@ -23,7 +23,7 @@ func TestJWTGenerateAndParse(t *testing.T) {
|
||||
t.Fatalf("failed to create service: %v", err)
|
||||
}
|
||||
|
||||
tok, exp, err := svc.GenerateJWT("ns1", "subj", time.Minute)
|
||||
tok, exp, err := svc.GenerateJWT("ns1", "subj", time.Minute, nil)
|
||||
if err != nil || exp <= 0 {
|
||||
t.Fatalf("gen err=%v exp=%d", err, exp)
|
||||
}
|
||||
@ -50,7 +50,7 @@ func TestJWTExpired(t *testing.T) {
|
||||
}
|
||||
|
||||
// Use sufficiently negative TTL to bypass allowed clock skew
|
||||
tok, _, err := svc.GenerateJWT("ns1", "subj", -2*time.Minute)
|
||||
tok, _, err := svc.GenerateJWT("ns1", "subj", -2*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("gen err=%v", err)
|
||||
}
|
||||
|
||||
@ -51,7 +51,7 @@ func newAuthServiceForTest(t *testing.T) *auth.Service {
|
||||
|
||||
func TestAuthMiddleware_WSJWTQuery_validToken(t *testing.T) {
|
||||
svc := newAuthServiceForTest(t)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET_SUBJECT", 15*time.Minute)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET_SUBJECT", 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT: %v", err)
|
||||
}
|
||||
@ -125,7 +125,7 @@ func TestAuthMiddleware_WSJWTQuery_ignoredOnNonWSRequest(t *testing.T) {
|
||||
// privacy issues of JWTs leaking via referrer headers, browser history,
|
||||
// and access logs.
|
||||
svc := newAuthServiceForTest(t)
|
||||
token, _, err := svc.GenerateJWT("ns", "sub", 15*time.Minute)
|
||||
token, _, err := svc.GenerateJWT("ns", "sub", 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT: %v", err)
|
||||
}
|
||||
@ -156,8 +156,8 @@ func TestAuthMiddleware_WSJWTQuery_headerWinsOverQuery(t *testing.T) {
|
||||
// Header path runs FIRST and wins. Verifies the query fallback is a
|
||||
// fallback, not an override.
|
||||
svc := newAuthServiceForTest(t)
|
||||
headerJWT, _, _ := svc.GenerateJWT("ns-header", "sub-header", 15*time.Minute)
|
||||
queryJWT, _, _ := svc.GenerateJWT("ns-query", "sub-query", 15*time.Minute)
|
||||
headerJWT, _, _ := svc.GenerateJWT("ns-header", "sub-header", 15*time.Minute, nil)
|
||||
queryJWT, _, _ := svc.GenerateJWT("ns-query", "sub-query", 15*time.Minute, nil)
|
||||
|
||||
g := &Gateway{authService: svc}
|
||||
|
||||
@ -242,7 +242,7 @@ func TestAuthMiddleware_WSJWTQuery_malformedJWTFallsThrough(t *testing.T) {
|
||||
|
||||
func TestValidateAuthForNamespaceProxy_WSJWTQuery(t *testing.T) {
|
||||
svc := newAuthServiceForTest(t)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT: %v", err)
|
||||
}
|
||||
@ -270,7 +270,7 @@ func TestValidateAuthForNamespaceProxy_WSJWTQuery(t *testing.T) {
|
||||
|
||||
func TestValidateAuthForNamespaceProxy_WSJWTQuery_ignoredOnNonWS(t *testing.T) {
|
||||
svc := newAuthServiceForTest(t)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT: %v", err)
|
||||
}
|
||||
@ -295,7 +295,7 @@ func TestValidateAuthForNamespaceProxy_WSJWTQuery_ignoredOnNonWS(t *testing.T) {
|
||||
// doesn't leak into proxy hops or downstream logs.
|
||||
func TestAuthMiddleware_WSJWTQuery_strippedAfterVerify(t *testing.T) {
|
||||
svc := newAuthServiceForTest(t)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute)
|
||||
token, _, err := svc.GenerateJWT("anchat-test", "0xWALLET", 15*time.Minute, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateJWT: %v", err)
|
||||
}
|
||||
|
||||
@ -493,7 +493,7 @@ func (i *Invoker) BatchInvoke(ctx context.Context, req *BatchInvokeRequest) (*Ba
|
||||
func isSystemTrigger(t TriggerType) bool {
|
||||
switch t {
|
||||
case TriggerTypeCron, TriggerTypePubSub, TriggerTypeDatabase,
|
||||
TriggerTypeTimer, TriggerTypeJob:
|
||||
TriggerTypeTimer, TriggerTypeJob, TriggerTypeInternal:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
||||
@ -30,6 +30,11 @@ const (
|
||||
TriggerTypePubSub TriggerType = "pubsub"
|
||||
TriggerTypeTimer TriggerType = "timer"
|
||||
TriggerTypeJob TriggerType = "job"
|
||||
// TriggerTypeInternal marks a gateway-initiated invocation with no end-user
|
||||
// caller (e.g. the auth claims-provider hook at JWT mint time, bugboard
|
||||
// #548). Treated as a system trigger so the per-caller authorization check
|
||||
// is skipped — the gateway is the trusted invoker.
|
||||
TriggerTypeInternal TriggerType = "internal"
|
||||
)
|
||||
|
||||
// JobStatus represents the current state of a background job.
|
||||
@ -234,8 +239,8 @@ type FunctionDefinition struct {
|
||||
// When WSPersistent is true, the function exports ws_open/ws_frame/ws_close
|
||||
// instead of using the default per-frame stateless model.
|
||||
WSPersistent bool `json:"ws_persistent,omitempty"`
|
||||
WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"` // 0 = no idle timeout
|
||||
WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"` // 0 = use default 256 KB
|
||||
WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"` // 0 = no idle timeout
|
||||
WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"` // 0 = use default 256 KB
|
||||
WSMaxInflightPerConn int `json:"ws_max_inflight_per_conn,omitempty"` // 0 = use default 64
|
||||
|
||||
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835): the
|
||||
@ -284,11 +289,11 @@ type Function struct {
|
||||
|
||||
// InvocationContext provides context for a function invocation.
|
||||
type InvocationContext struct {
|
||||
RequestID string `json:"request_id"`
|
||||
FunctionID string `json:"function_id"`
|
||||
FunctionName string `json:"function_name"`
|
||||
Namespace string `json:"namespace"`
|
||||
CallerWallet string `json:"caller_wallet,omitempty"`
|
||||
RequestID string `json:"request_id"`
|
||||
FunctionID string `json:"function_id"`
|
||||
FunctionName string `json:"function_name"`
|
||||
Namespace string `json:"namespace"`
|
||||
CallerWallet string `json:"caller_wallet,omitempty"`
|
||||
// CallerIP is the source IP of the request, populated by HTTP/WS handlers.
|
||||
// Used by the multi-tier rate limiter as a fallback bucket for anonymous
|
||||
// (no-wallet) callers.
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@debros/orama",
|
||||
"version": "0.122.47",
|
||||
"version": "0.122.49",
|
||||
"description": "TypeScript SDK for Orama Network - Database, PubSub, Cache, Storage, Vault, and more",
|
||||
"type": "module",
|
||||
"main": "./dist/index.js",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user