orama/core/pkg/gateway/middleware.go
anonpenguin23 fda47533c3 feat: per-namespace rate-limit self-service + WS JWT auth + release 0.122.12
Per-namespace rate-limit config (feature #69)
- Migration 027: new `namespace_rate_limit_config` table
  (namespace PK, requests_per_minute, burst, audit metadata).
- pkg/ratelimit: Manager + RQLite ConfigStore + types. Same pattern
  as the push config in bug #220's follow-up — LRU cache, invalidate
  on PUT/DELETE, falls back to YAML defaults when no row exists.
- pkg/gateway/handlers/ratelimit: GET/PUT/DELETE /v1/namespace/rate-limit.
  PUT requests are rejected if they exceed the operator's configured
  ceiling (MaxRequestsPerMinute / MaxBurst) — tenants self-serve but
  cannot raise their quota past the cap.
- pkg/gateway/rate_limiter.go: per-namespace lookup, default fallback.
- pkg/gateway/middleware.go: WS JWT middleware (middleware_ws_jwt_test.go).
- pkg/gateway/auth/service.go: refresh-token rotation hardening with
  regression test in refresh_rotation_test.go.

AI agent instructions
- Add AGENTS.md, CLAUDE.md, .github/copilot-instructions.md (DeBros v0.2.0
  baseline).

DeBros rules bumped to v0.2.0 (sha bb6e6ef).

VERSION bumped to 0.122.12.
2026-05-13 15:41:36 +03:00

1905 lines
65 KiB
Go

package gateway
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"hash/fnv"
"io"
"net"
"net/http"
"net/url"
"sort"
"strconv"
"strings"
"time"
"github.com/DeBrosOfficial/network/pkg/client"
"github.com/DeBrosOfficial/network/pkg/deployments"
"github.com/DeBrosOfficial/network/pkg/gateway/auth"
"github.com/DeBrosOfficial/network/pkg/httputil"
"github.com/DeBrosOfficial/network/pkg/logging"
"go.uber.org/zap"
)
// isLongRunningProxyPath returns true for paths whose expected work bound
// exceeds the default 30s proxy timeout. New classes go here, alphabetically.
//
// - /v1/functions/.../invoke and /v1/invoke/... — up to 300s per fn
// - /v1/functions/.../ws — long-lived WS
// - /v1/storage/upload, /v1/storage/pin — IPFS add can be slow
//
// Adding a path here is preferable to bumping the global timeout: each
// path's bound is documented in one grep-able place.
func isLongRunningProxyPath(p string) bool {
switch {
case strings.HasPrefix(p, "/v1/storage/upload"),
strings.HasPrefix(p, "/v1/storage/pin"),
strings.HasPrefix(p, "/v1/invoke/"),
strings.HasPrefix(p, "/v1/functions/") && (strings.HasSuffix(p, "/invoke") || strings.HasSuffix(p, "/ws")):
return true
}
return false
}
// isProxyTimeoutErr returns true when an HTTP client error is a timeout —
// either the http.Client overall timeout or context.DeadlineExceeded
// propagating from a parent context.
func isProxyTimeoutErr(err error) bool {
if err == nil {
return false
}
if errors.Is(err, context.DeadlineExceeded) {
return true
}
var urlErr *url.Error
if errors.As(err, &urlErr) && urlErr.Timeout() {
return true
}
return false
}
// Note: context keys (ctxKeyAPIKey, ctxKeyJWT, CtxKeyNamespaceOverride) are now defined in context.go
// Internal auth headers for trusted inter-gateway communication.
// When the main gateway proxies to a namespace gateway, it validates auth first
// and passes the validated namespace via these headers. The namespace gateway
// trusts these headers when they come from internal IPs (WireGuard 10.0.0.x).
const (
// HeaderInternalAuthNamespace contains the validated namespace name
HeaderInternalAuthNamespace = "X-Internal-Auth-Namespace"
// HeaderInternalAuthValidated indicates the request was pre-authenticated by main gateway
HeaderInternalAuthValidated = "X-Internal-Auth-Validated"
// HeaderInternalAuthJWTSub carries the validated JWT `sub` claim across the
// internal proxy hop. Bug #215: without this, namespace gateways received
// only the namespace and host functions saw empty `caller_jwt_subject`.
// Trusted only when X-Internal-Auth-Validated=true AND the source IP is
// internal (WireGuard mesh / loopback) — same trust model as the
// namespace header.
HeaderInternalAuthJWTSub = "X-Internal-Auth-JWT-Sub"
// HeaderInternalAuthJWTCustom carries the validated JWT custom-claims map
// as a base64(JSON) blob. Optional; absent means the original token had
// no custom claims. Same trust gate as HeaderInternalAuthJWTSub.
HeaderInternalAuthJWTCustom = "X-Internal-Auth-JWT-Custom"
)
// setInternalAuthJWTHeaders writes the validated JWT subject and custom claims
// onto an outbound proxy request so the destination gateway can hydrate
// ctxKeyJWT. Called only after auth has been validated AND we're proxying over
// the WireGuard mesh (the destination still gates trust by source IP).
//
// No-op when claims is nil (caller authenticated with API key, not JWT).
func setInternalAuthJWTHeaders(h http.Header, claims *auth.JWTClaims) {
if claims == nil {
return
}
if sub := strings.TrimSpace(claims.Sub); sub != "" {
h.Set(HeaderInternalAuthJWTSub, sub)
}
if len(claims.Custom) > 0 {
buf, err := json.Marshal(claims.Custom)
if err == nil {
h.Set(HeaderInternalAuthJWTCustom, base64.StdEncoding.EncodeToString(buf))
}
}
}
// stripInboundInternalAuthHeaders deletes the X-Internal-Auth-* headers from
// the supplied header set. MUST be called on every outbound request the main
// gateway proxies to a namespace gateway BEFORE we (conditionally) re-set
// them with the validated values.
//
// SECURITY (bug #215 follow-up — critical): without this, an external client
// could send `X-Internal-Auth-Validated: true` + `X-Internal-Auth-JWT-Sub:
// <victim-wallet>` directly. The header-copy loop at the proxy hop would
// forward those headers verbatim. The namespace gateway, seeing the request
// arrive from the main gateway's WireGuard IP, would trust them and hydrate
// ctxKeyJWT with attacker-controlled subject — full cross-namespace identity
// spoofing on every public path.
//
// Stripping FIRST and conditionally setting AFTER closes that hole regardless
// of which auth path won (JWT, API key, or no creds on a public path).
func stripInboundInternalAuthHeaders(h http.Header) {
h.Del(HeaderInternalAuthValidated)
h.Del(HeaderInternalAuthNamespace)
h.Del(HeaderInternalAuthJWTSub)
h.Del(HeaderInternalAuthJWTCustom)
}
// maxQueryJWTLength caps the size of a JWT accepted via `?jwt=` query
// param. EdDSA + RS256 JWTs minted by this gateway are well under 2 KB;
// 4 KB is a generous ceiling that still cheaply rejects DoS attempts
// that try to feed multi-MB tokens through the verifier.
const maxQueryJWTLength = 4096
// stripJWTQueryParam removes the `jwt` key from the URL's query string
// (if present), mutating r in place. Called after a successful WS-upgrade
// JWT-via-query verification so the token doesn't propagate to:
// - the namespace-gateway proxy hop (`r.URL.RawQuery` is forwarded)
// - downstream handler logs that record `r.URL.RequestURI()`
// - any inner `r.URL.Query()` lookups in business logic
//
// Idempotent: safe to call on requests without a `jwt` param.
func stripJWTQueryParam(r *http.Request) {
q := r.URL.Query()
if !q.Has("jwt") {
return
}
q.Del("jwt")
r.URL.RawQuery = q.Encode()
}
// claimsFromInternalAuthHeaders rebuilds a *auth.JWTClaims from the trusted
// internal-auth headers. Returns nil if no JWT subject was forwarded (the
// caller used an API key, or the request didn't carry validated JWT data).
//
// SECURITY: this MUST only be called after the caller has confirmed
// HeaderInternalAuthValidated == "true" AND the source IP is internal AND
// the proxy hop has stripped any inbound copies of these headers (see
// stripInboundInternalAuthHeaders). Skipping any of those would let any
// client forge JWT identities.
func claimsFromInternalAuthHeaders(h http.Header, namespace string) *auth.JWTClaims {
sub := strings.TrimSpace(h.Get(HeaderInternalAuthJWTSub))
if sub == "" {
return nil
}
claims := &auth.JWTClaims{
Sub: sub,
Namespace: namespace,
}
if raw := strings.TrimSpace(h.Get(HeaderInternalAuthJWTCustom)); raw != "" {
if decoded, err := base64.StdEncoding.DecodeString(raw); err == nil {
var custom map[string]string
if json.Unmarshal(decoded, &custom) == nil && len(custom) > 0 {
claims.Custom = custom
}
}
}
return claims
}
// validateAuthForNamespaceProxy validates the request's auth credentials against the MAIN
// cluster RQLite and returns the namespace the credentials belong to plus the
// JWT claims when the caller authenticated with a Bearer JWT.
// This is used by handleNamespaceGatewayRequest to pre-authenticate before proxying to
// namespace gateways (which have isolated RQLites without API keys).
//
// Returns:
// - (namespace, claims, "") if auth is valid via JWT — claims may be used to
// populate internal-auth headers so the namespace gateway can hydrate
// `caller_jwt_subject` for serverless host functions (bug #215).
// - (namespace, nil, "") if auth is valid via API key.
// - ("", nil, errorMessage) if auth is invalid.
// - ("", nil, "") if no auth credentials provided (for public paths).
func (g *Gateway) validateAuthForNamespaceProxy(r *http.Request) (namespace string, claims *auth.JWTClaims, errMsg string) {
// 1) Try JWT Bearer first
if authHeader := r.Header.Get("Authorization"); authHeader != "" {
lower := strings.ToLower(authHeader)
if strings.HasPrefix(lower, "bearer ") {
tok := strings.TrimSpace(authHeader[len("Bearer "):])
if strings.Count(tok, ".") == 2 {
if c, err := g.authService.ParseAndVerifyJWT(tok); err == nil {
if ns := strings.TrimSpace(c.Namespace); ns != "" {
return ns, c, ""
}
}
// JWT verification failed - fall through to API key check
}
}
}
// 1b) WS upgrade fallback: JWT via `?jwt=` query. Same rationale as in
// authMiddleware — browser / React Native WS clients can't set custom
// headers reliably. Bug #240. Strip-after-verify is applied here too
// so the JWT doesn't propagate to the namespace gateway over the proxy
// hop (where it would otherwise live in the proxied request's RawQuery
// + the inner gateway's logs).
if isWebSocketUpgrade(r) {
tok := strings.TrimSpace(r.URL.Query().Get("jwt"))
if tok != "" && len(tok) <= maxQueryJWTLength && strings.Count(tok, ".") == 2 {
if c, err := g.authService.ParseAndVerifyJWT(tok); err == nil {
if ns := strings.TrimSpace(c.Namespace); ns != "" {
stripJWTQueryParam(r)
return ns, c, ""
}
}
}
}
// 2) Try API key
key := extractAPIKey(r)
if key == "" {
return "", nil, "" // No credentials provided
}
ns, err := g.lookupAPIKeyNamespace(r.Context(), key, g.client)
if err != nil {
return "", nil, "invalid API key"
}
return ns, nil, ""
}
// lookupAPIKeyNamespace resolves an API key to its namespace using cache and DB.
// dbClient controls which database is queried (global vs namespace-specific).
// Returns the namespace name or an error if the key is invalid.
//
// Dual lookup strategy for rolling upgrade: tries HMAC-hashed key first (new keys),
// then falls back to raw key lookup (existing unhashed keys during transition).
func (g *Gateway) lookupAPIKeyNamespace(ctx context.Context, key string, dbClient client.NetworkClient) (string, error) {
// Cache uses raw key as cache key (in-memory only, never persisted)
if g.mwCache != nil {
if cachedNS, ok := g.mwCache.GetAPIKeyNamespace(key); ok {
return cachedNS, nil
}
}
db := dbClient.Database()
internalCtx := client.WithInternalAuth(ctx)
q := "SELECT namespaces.name FROM api_keys JOIN namespaces ON api_keys.namespace_id = namespaces.id WHERE api_keys.key = ? LIMIT 1"
// Try HMAC-hashed lookup first (new keys stored as hashes)
hashedKey := g.authService.HashAPIKey(key)
res, err := db.Query(internalCtx, q, hashedKey)
if err == nil && res != nil && res.Count > 0 && len(res.Rows) > 0 && len(res.Rows[0]) > 0 {
if ns := getString(res.Rows[0][0]); ns != "" {
if g.mwCache != nil {
g.mwCache.SetAPIKeyNamespace(key, ns)
}
return ns, nil
}
}
// Fallback: try raw key lookup (existing unhashed keys during rolling upgrade)
if hashedKey != key {
res, err = db.Query(internalCtx, q, key)
if err == nil && res != nil && res.Count > 0 && len(res.Rows) > 0 && len(res.Rows[0]) > 0 {
if ns := getString(res.Rows[0][0]); ns != "" {
if g.mwCache != nil {
g.mwCache.SetAPIKeyNamespace(key, ns)
}
return ns, nil
}
}
}
return "", fmt.Errorf("invalid API key")
}
// isWebSocketUpgrade checks if the request is a WebSocket upgrade request
func isWebSocketUpgrade(r *http.Request) bool {
connection := strings.ToLower(r.Header.Get("Connection"))
upgrade := strings.ToLower(r.Header.Get("Upgrade"))
return strings.Contains(connection, "upgrade") && upgrade == "websocket"
}
// proxyWebSocket proxies a WebSocket connection by hijacking the client connection
// and tunneling bidirectionally to the backend
func (g *Gateway) proxyWebSocket(w http.ResponseWriter, r *http.Request, targetHost string) bool {
hijacker, ok := w.(http.Hijacker)
if !ok {
http.Error(w, "WebSocket proxy not supported", http.StatusInternalServerError)
return false
}
// Connect to backend
backendConn, err := net.DialTimeout("tcp", targetHost, 10*time.Second)
if err != nil {
g.logger.ComponentError(logging.ComponentGeneral, "WebSocket backend dial failed",
zap.String("target", targetHost),
zap.Error(err),
)
http.Error(w, "Backend unavailable", http.StatusServiceUnavailable)
return false
}
// Write the original request to backend (this initiates the WebSocket handshake)
if err := r.Write(backendConn); err != nil {
backendConn.Close()
g.logger.ComponentError(logging.ComponentGeneral, "WebSocket handshake write failed",
zap.Error(err),
)
http.Error(w, "Failed to initiate WebSocket", http.StatusBadGateway)
return false
}
// Hijack client connection
clientConn, clientBuf, err := hijacker.Hijack()
if err != nil {
backendConn.Close()
g.logger.ComponentError(logging.ComponentGeneral, "WebSocket hijack failed",
zap.Error(err),
)
return false
}
// Flush any buffered data from the client
if clientBuf.Reader.Buffered() > 0 {
buffered := make([]byte, clientBuf.Reader.Buffered())
clientBuf.Read(buffered)
backendConn.Write(buffered)
}
// Bidirectional copy between client and backend
done := make(chan struct{}, 2)
go func() {
defer func() { done <- struct{}{} }()
io.Copy(clientConn, backendConn)
clientConn.Close()
}()
go func() {
defer func() { done <- struct{}{} }()
io.Copy(backendConn, clientConn)
backendConn.Close()
}()
// Wait for one side to close
<-done
clientConn.Close()
backendConn.Close()
<-done
return true
}
// withMiddleware adds CORS, security headers, rate limiting, and logging middleware
func (g *Gateway) withMiddleware(next http.Handler) http.Handler {
// Order: logging -> security headers -> rate limit -> CORS -> domain routing -> auth -> authorization -> namespace rate limit -> handler
return g.loggingMiddleware(
g.securityHeadersMiddleware(
g.rateLimitMiddleware(
g.corsMiddleware(
g.domainRoutingMiddleware(
g.authMiddleware(
g.authorizationMiddleware(
g.namespaceRateLimitMiddleware(next))))))))
}
// securityHeadersMiddleware adds standard security headers to all responses
func (g *Gateway) securityHeadersMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("X-Content-Type-Options", "nosniff")
w.Header().Set("X-Frame-Options", "DENY")
w.Header().Set("X-XSS-Protection", "0")
w.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
w.Header().Set("Permissions-Policy", "camera=(self), microphone=(self), geolocation=()")
// HSTS only when behind TLS (Caddy)
if r.TLS != nil || r.Header.Get("X-Forwarded-Proto") == "https" {
w.Header().Set("Strict-Transport-Security", "max-age=31536000; includeSubDomains")
}
next.ServeHTTP(w, r)
})
}
// loggingMiddleware logs basic request info and duration
func (g *Gateway) loggingMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
srw := &statusResponseWriter{ResponseWriter: w, status: http.StatusOK}
next.ServeHTTP(srw, r)
dur := time.Since(start)
g.logger.ComponentInfo(logging.ComponentGeneral, "request",
zap.String("method", r.Method),
zap.String("path", r.URL.Path),
zap.Int("status", srw.status),
zap.Int("bytes", srw.bytes),
zap.String("duration", dur.String()),
)
// Enqueue log entry for batched persistence (replaces per-request DB writes)
if g.logBatcher != nil {
apiKey := ""
if v := r.Context().Value(ctxKeyAPIKey); v != nil {
if s, ok := v.(string); ok {
apiKey = s
}
}
g.logBatcher.Add(requestLogEntry{
method: r.Method,
path: r.URL.Path,
statusCode: srw.status,
bytesOut: srw.bytes,
durationMs: dur.Milliseconds(),
ip: getClientIP(r),
apiKey: apiKey,
})
}
})
}
// authMiddleware enforces auth when enabled via config.
// Accepts:
// - Authorization: Bearer <JWT> (RS256 / EdDSA issued by this gateway)
// - Authorization: Bearer <API key> or ApiKey <API key>
// - X-API-Key: <API key>
// - ?api_key=<key> or ?token=<key> query string (WebSocket upgrade only)
// - ?jwt=<token> query string (WebSocket upgrade only — bug #240; needed
// because browser/RN WS clients can't reliably set custom headers)
// - X-Internal-Auth-Validated: true (from internal IPs only - pre-authenticated by main gateway)
func (g *Gateway) authMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Allow preflight without auth
if r.Method == http.MethodOptions {
next.ServeHTTP(w, r)
return
}
isPublic := isPublicPath(r.URL.Path)
// 0) Trust internal auth headers from internal IPs (WireGuard network or localhost)
// This allows the main gateway to pre-authenticate requests before proxying to namespace gateways
// IMPORTANT: Use r.RemoteAddr (actual TCP peer), NOT getClientIP() which reads
// X-Forwarded-For and would return the original client IP instead of the proxy's IP.
if r.Header.Get(HeaderInternalAuthValidated) == "true" {
clientIP := remoteAddrIP(r)
if isInternalIP(clientIP) {
ns := strings.TrimSpace(r.Header.Get(HeaderInternalAuthNamespace))
if ns != "" {
// Pre-authenticated by main gateway - trust the namespace
reqCtx := context.WithValue(r.Context(), CtxKeyNamespaceOverride, ns)
// Bug #215: also rebuild ctxKeyJWT from the trusted
// internal-auth headers (subject + custom claims) so
// serverless host functions see a non-empty
// caller_jwt_subject. We deliberately do NOT re-verify
// the JWT here — namespace gateways may not share the
// signing key with the main gateway, and the main gateway
// already verified before forwarding. Trust gate is the
// source-IP check above.
if claims := claimsFromInternalAuthHeaders(r.Header, ns); claims != nil {
reqCtx = context.WithValue(reqCtx, ctxKeyJWT, claims)
}
next.ServeHTTP(w, r.WithContext(reqCtx))
return
}
}
// If internal auth header is present but invalid (wrong IP or missing namespace),
// fall through to normal auth flow
}
// 1) Try JWT Bearer first if Authorization looks like one
if auth := r.Header.Get("Authorization"); auth != "" {
lower := strings.ToLower(auth)
if strings.HasPrefix(lower, "bearer ") {
tok := strings.TrimSpace(auth[len("Bearer "):])
if strings.Count(tok, ".") == 2 {
if claims, err := g.authService.ParseAndVerifyJWT(tok); err == nil {
// Attach JWT claims and namespace to context
ctx := context.WithValue(r.Context(), ctxKeyJWT, claims)
if ns := strings.TrimSpace(claims.Namespace); ns != "" {
ctx = context.WithValue(ctx, CtxKeyNamespaceOverride, ns)
}
next.ServeHTTP(w, r.WithContext(ctx))
return
}
// If it looked like a JWT but failed verification, fall through to API key check
}
}
}
// 1b) WebSocket-only fallback: JWT in the `?jwt=` query parameter.
//
// Browser and React Native WebSocket clients can't reliably set custom
// headers on the upgrade request — the WebSocket constructor either
// ignores the headers argument (browsers) or silently strips
// Authorization (RN iOS). Without a fallback, every authenticated WS
// endpoint is unreachable from those platforms. Bug #240.
//
// We gate this ONLY on WS upgrade requests to keep JWTs out of normal
// HTTP URLs (where they end up in access logs, referrer headers, and
// browser history). For WS, the upgrade URL is only emitted on
// connection establishment — much smaller exposure surface — and TLS
// (wss://) keeps it off the wire in transit.
//
// After a successful verify, we STRIP the `jwt` query param from the
// request before passing downstream (`stripJWTQueryParam`). This
// shrinks the replay window: the token doesn't propagate through the
// proxy hop to the namespace gateway, doesn't reach the backend
// handler's logs, and doesn't show up in any downstream `r.URL`
// inspection. Belt-and-suspenders given the trust we've already
// established by verifying the signature.
if isWebSocketUpgrade(r) {
tok := strings.TrimSpace(r.URL.Query().Get("jwt"))
// Cheap length sanity-check before invoking the verifier. Real
// EdDSA / RS256 JWTs issued by this gateway are well under 4 KB.
// Anything larger is either malformed or a DoS attempt.
if tok != "" && len(tok) <= maxQueryJWTLength && strings.Count(tok, ".") == 2 {
if claims, err := g.authService.ParseAndVerifyJWT(tok); err == nil {
stripJWTQueryParam(r)
ctx := context.WithValue(r.Context(), ctxKeyJWT, claims)
if ns := strings.TrimSpace(claims.Namespace); ns != "" {
ctx = context.WithValue(ctx, CtxKeyNamespaceOverride, ns)
}
next.ServeHTTP(w, r.WithContext(ctx))
return
}
// Invalid JWT in query — fall through to API key check
// rather than 401-ing here, in case the caller also supplied
// a valid api_key as belt-and-suspenders.
}
}
// 2) Fallback to API key (validate against DB)
key := extractAPIKey(r)
if key == "" {
if isPublic {
next.ServeHTTP(w, r)
return
}
w.Header().Set("WWW-Authenticate", "Bearer realm=\"gateway\", charset=\"UTF-8\"")
writeError(w, http.StatusUnauthorized, "missing API key")
return
}
// Look up API key → namespace (uses cache + DB)
dbClient := g.client
if g.authClient != nil {
dbClient = g.authClient
}
ns, err := g.lookupAPIKeyNamespace(r.Context(), key, dbClient)
if err != nil {
if isPublic {
next.ServeHTTP(w, r)
return
}
w.Header().Set("WWW-Authenticate", "Bearer error=\"invalid_token\"")
writeError(w, http.StatusUnauthorized, "invalid API key")
return
}
// Attach auth metadata to context for downstream use
reqCtx := context.WithValue(r.Context(), ctxKeyAPIKey, key)
reqCtx = context.WithValue(reqCtx, CtxKeyNamespaceOverride, ns)
next.ServeHTTP(w, r.WithContext(reqCtx))
})
}
// extractAPIKey extracts API key from Authorization, X-API-Key header, or query parameters
// Note: Bearer tokens that look like JWTs (have 2 dots) are skipped (they're JWTs, handled separately)
// X-API-Key header is preferred when both Authorization and X-API-Key are present
func extractAPIKey(r *http.Request) string {
// Prefer X-API-Key header (most explicit) - check this first
if v := strings.TrimSpace(r.Header.Get("X-API-Key")); v != "" {
return v
}
// Check Authorization header for ApiKey scheme or non-JWT Bearer tokens
auth := r.Header.Get("Authorization")
if auth != "" {
lower := strings.ToLower(auth)
if strings.HasPrefix(lower, "bearer ") {
tok := strings.TrimSpace(auth[len("Bearer "):])
// Skip Bearer tokens that look like JWTs (have 2 dots) - they're JWTs
// But allow Bearer tokens that don't look like JWTs (for backward compatibility)
if strings.Count(tok, ".") == 2 {
// This is a JWT, skip it
} else {
// This doesn't look like a JWT, treat as API key (backward compatibility)
return tok
}
} else if strings.HasPrefix(lower, "apikey ") {
return strings.TrimSpace(auth[len("ApiKey "):])
} else if !strings.Contains(auth, " ") {
// If header has no scheme, treat the whole value as token (lenient for dev)
// But skip if it looks like a JWT (has 2 dots)
tok := strings.TrimSpace(auth)
if strings.Count(tok, ".") != 2 {
return tok
}
}
}
// Fallback to query parameter ONLY for WebSocket upgrade requests.
// WebSocket clients cannot set custom headers, so query params are the
// only way to authenticate. For regular HTTP requests, require headers.
if isWebSocketUpgrade(r) {
if v := strings.TrimSpace(r.URL.Query().Get("api_key")); v != "" {
return v
}
if v := strings.TrimSpace(r.URL.Query().Get("token")); v != "" {
return v
}
}
return ""
}
// isPublicPath returns true for routes that should be accessible without API key auth
func isPublicPath(p string) bool {
// Allow ACME challenges for Let's Encrypt certificate provisioning
if strings.HasPrefix(p, "/.well-known/acme-challenge/") {
return true
}
// Serverless invocation is public (authorization is handled within the invoker)
if strings.HasPrefix(p, "/v1/invoke/") || (strings.HasPrefix(p, "/v1/functions/") && strings.HasSuffix(p, "/invoke")) {
return true
}
// Internal replica coordination endpoints (auth handled by replica handler)
if strings.HasPrefix(p, "/v1/internal/deployments/replica/") {
return true
}
// WireGuard peer exchange (auth handled by cluster secret in handler)
if strings.HasPrefix(p, "/v1/internal/wg/") {
return true
}
// Node join endpoint (auth handled by invite token in handler)
if p == "/v1/internal/join" {
return true
}
// Namespace spawn endpoint (auth handled by internal auth header)
if p == "/v1/internal/namespace/spawn" {
return true
}
// Namespace cluster repair endpoint (auth handled by internal auth header)
if p == "/v1/internal/namespace/repair" {
return true
}
// Vault proxy endpoints (no auth — rate-limited per identity hash within handler)
if strings.HasPrefix(p, "/v1/vault/") {
return true
}
// Phantom auth endpoints are public (session creation, status polling, completion)
if strings.HasPrefix(p, "/v1/auth/phantom/") {
return true
}
switch p {
case "/health", "/v1/health", "/status", "/v1/status", "/v1/auth/jwks", "/.well-known/jwks.json", "/v1/version", "/v1/auth/challenge", "/v1/auth/verify", "/v1/auth/register", "/v1/auth/refresh", "/v1/auth/logout", "/v1/auth/api-key", "/v1/network/status", "/v1/network/peers", "/v1/internal/tls/check", "/v1/internal/acme/present", "/v1/internal/acme/cleanup", "/v1/internal/ping":
return true
default:
// Also exempt namespace status polling endpoint
if strings.HasPrefix(p, "/v1/namespace/status") {
return true
}
return false
}
}
// authorizationMiddleware enforces that the authenticated actor owns the namespace
// for certain protected paths (e.g., apps CRUD and storage APIs).
// Also enforces cross-namespace access control:
// - "default" namespace: accessible by any valid API key
// - Other namespaces: API key must belong to that specific namespace
func (g *Gateway) authorizationMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Skip for public/OPTIONS paths only
if r.Method == http.MethodOptions || isPublicPath(r.URL.Path) {
next.ServeHTTP(w, r)
return
}
// Exempt whoami from ownership enforcement so users can inspect their session
if r.URL.Path == "/v1/auth/whoami" {
next.ServeHTTP(w, r)
return
}
// Exempt namespace status endpoint
if strings.HasPrefix(r.URL.Path, "/v1/namespace/status") {
next.ServeHTTP(w, r)
return
}
// Skip ownership checks for requests pre-authenticated by the main gateway.
// The main gateway already validated the API key and resolved the namespace
// before proxying, so re-checking ownership against the namespace RQLite is
// redundant and adds ~300ms of unnecessary latency (3 DB round-trips).
if r.Header.Get(HeaderInternalAuthValidated) == "true" {
clientIP := remoteAddrIP(r)
if isInternalIP(clientIP) {
next.ServeHTTP(w, r)
return
}
}
// Cross-namespace access control for namespace gateways
// The gateway's ClientNamespace determines which namespace this gateway serves
gatewayNamespace := "default"
if g.cfg != nil && g.cfg.ClientNamespace != "" {
gatewayNamespace = strings.TrimSpace(g.cfg.ClientNamespace)
}
// Get user's namespace from context (derived from API key/JWT)
userNamespace := ""
if v := r.Context().Value(CtxKeyNamespaceOverride); v != nil {
if s, ok := v.(string); ok {
userNamespace = strings.TrimSpace(s)
}
}
// For non-default namespace gateways, the API key must belong to this namespace
// This enforces physical isolation: alice's gateway only accepts alice's API keys
if gatewayNamespace != "default" && userNamespace != "" && userNamespace != gatewayNamespace {
g.logger.ComponentWarn(logging.ComponentGeneral, "cross-namespace access denied",
zap.String("user_namespace", userNamespace),
zap.String("gateway_namespace", gatewayNamespace),
zap.String("path", r.URL.Path),
)
writeError(w, http.StatusForbidden, "API key does not belong to this namespace")
return
}
// Only enforce ownership for specific resource paths
if !requiresNamespaceOwnership(r.URL.Path) {
next.ServeHTTP(w, r)
return
}
// Determine namespace from context
ctx := r.Context()
ns := ""
if v := ctx.Value(CtxKeyNamespaceOverride); v != nil {
if s, ok := v.(string); ok {
ns = strings.TrimSpace(s)
}
}
if ns == "" && g.cfg != nil {
ns = strings.TrimSpace(g.cfg.ClientNamespace)
}
if ns == "" {
writeError(w, http.StatusForbidden, "namespace not resolved")
return
}
// Identify actor from context
ownerType := ""
ownerID := ""
apiKeyFallback := ""
if v := ctx.Value(ctxKeyJWT); v != nil {
if claims, ok := v.(*auth.JWTClaims); ok && claims != nil && strings.TrimSpace(claims.Sub) != "" {
// Determine subject type.
// If subject looks like an API key (e.g., ak_<random>:<namespace>),
// treat it as an API key owner; otherwise assume a wallet subject.
subj := strings.TrimSpace(claims.Sub)
lowerSubj := strings.ToLower(subj)
if strings.HasPrefix(lowerSubj, "ak_") || strings.Contains(subj, ":") {
ownerType = "api_key"
ownerID = subj
} else {
ownerType = "wallet"
ownerID = subj
}
}
}
if ownerType == "" && ownerID == "" {
if v := ctx.Value(ctxKeyAPIKey); v != nil {
if s, ok := v.(string); ok && strings.TrimSpace(s) != "" {
ownerType = "api_key"
ownerID = strings.TrimSpace(s)
}
}
} else if ownerType == "wallet" {
// If we have a JWT wallet, also capture the API key as fallback
if v := ctx.Value(ctxKeyAPIKey); v != nil {
if s, ok := v.(string); ok && strings.TrimSpace(s) != "" {
apiKeyFallback = strings.TrimSpace(s)
}
}
}
if ownerType == "" || ownerID == "" {
writeError(w, http.StatusForbidden, "missing identity")
return
}
g.logger.ComponentInfo("gateway", "namespace auth check",
zap.String("namespace", ns),
zap.String("owner_type", ownerType),
zap.String("owner_id", ownerID),
)
// Check ownership in DB using internal auth context
db := g.client.Database()
internalCtx := client.WithInternalAuth(ctx)
// Ensure namespace exists and get id
if _, err := db.Query(internalCtx, "INSERT OR IGNORE INTO namespaces(name) VALUES (?)", ns); err != nil {
writeError(w, http.StatusInternalServerError, err.Error())
return
}
nres, err := db.Query(internalCtx, "SELECT id FROM namespaces WHERE name = ? LIMIT 1", ns)
if err != nil || nres == nil || nres.Count == 0 || len(nres.Rows) == 0 || len(nres.Rows[0]) == 0 {
writeError(w, http.StatusForbidden, "namespace not found")
return
}
nsID := nres.Rows[0][0]
q := "SELECT 1 FROM namespace_ownership WHERE namespace_id = ? AND owner_type = ? AND owner_id = ? LIMIT 1"
res, err := db.Query(internalCtx, q, nsID, ownerType, ownerID)
// If primary owner check fails and we have a JWT wallet with API key fallback, try the API key
if (err != nil || res == nil || res.Count == 0) && ownerType == "wallet" && apiKeyFallback != "" {
res, err = db.Query(internalCtx, q, nsID, "api_key", apiKeyFallback)
}
if err != nil || res == nil || res.Count == 0 {
writeError(w, http.StatusForbidden, "forbidden: not an owner of namespace")
return
}
next.ServeHTTP(w, r)
})
}
// requiresNamespaceOwnership returns true if the path should be guarded by
// namespace ownership checks.
func requiresNamespaceOwnership(p string) bool {
if p == "/rqlite" || p == "/v1/rqlite" || strings.HasPrefix(p, "/v1/rqlite/") {
return true
}
if strings.HasPrefix(p, "/v1/pubsub") {
return true
}
if strings.HasPrefix(p, "/v1/rqlite/") {
return true
}
if strings.HasPrefix(p, "/v1/proxy/") {
return true
}
if strings.HasPrefix(p, "/v1/functions") {
return true
}
if strings.HasPrefix(p, "/v1/webrtc/") {
return true
}
if strings.HasPrefix(p, "/v1/push/") {
return true
}
if strings.HasPrefix(p, "/v1/serverless/") {
return true
}
return false
}
// corsMiddleware applies CORS headers. Allows requests from the configured base
// domain and its subdomains. Falls back to permissive "*" only if no base domain
// is configured.
func (g *Gateway) corsMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
origin := r.Header.Get("Origin")
allowedOrigin := g.getAllowedOrigin(origin)
w.Header().Set("Access-Control-Allow-Origin", allowedOrigin)
w.Header().Set("Access-Control-Allow-Methods", "GET, PUT, POST, DELETE, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key")
w.Header().Set("Access-Control-Max-Age", strconv.Itoa(600))
if allowedOrigin != "*" {
w.Header().Set("Vary", "Origin")
}
if r.Method == http.MethodOptions {
w.WriteHeader(http.StatusNoContent)
return
}
next.ServeHTTP(w, r)
})
}
// getAllowedOrigin returns the allowed origin for CORS based on the request origin.
// If no base domain is configured, allows all origins (*).
// Otherwise, allows the base domain and any subdomain of it.
func (g *Gateway) getAllowedOrigin(origin string) string {
if g.cfg.BaseDomain == "" {
return "*"
}
if origin == "" {
return "https://" + g.cfg.BaseDomain
}
// Extract hostname from origin (e.g., "https://app.dbrs.space" -> "app.dbrs.space")
host := origin
if idx := strings.Index(host, "://"); idx != -1 {
host = host[idx+3:]
}
// Strip port if present
if idx := strings.Index(host, ":"); idx != -1 {
host = host[:idx]
}
// Allow exact match or subdomain match
if host == g.cfg.BaseDomain || strings.HasSuffix(host, "."+g.cfg.BaseDomain) {
return origin
}
// Also allow common development origins
if host == "localhost" || host == "127.0.0.1" {
return origin
}
return "https://" + g.cfg.BaseDomain
}
// persistRequestLog writes request metadata to the database (best-effort)
func (g *Gateway) persistRequestLog(r *http.Request, srw *statusResponseWriter, dur time.Duration) {
if g.client == nil {
return
}
// Use a short timeout to avoid blocking shutdowns
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
db := g.client.Database()
// Resolve API key ID if available
var apiKeyID interface{} = nil
if v := r.Context().Value(ctxKeyAPIKey); v != nil {
if key, ok := v.(string); ok && key != "" {
if res, err := db.Query(ctx, "SELECT id FROM api_keys WHERE key = ? LIMIT 1", key); err == nil {
if res != nil && res.Count > 0 && len(res.Rows) > 0 && len(res.Rows[0]) > 0 {
switch idv := res.Rows[0][0].(type) {
case int64:
apiKeyID = idv
case float64:
apiKeyID = int64(idv)
case int:
apiKeyID = int64(idv)
case string:
// best effort parse
if n, err := strconv.Atoi(idv); err == nil {
apiKeyID = int64(n)
}
}
}
}
}
}
ip := getClientIP(r)
// Insert the log row
_, _ = db.Query(ctx,
"INSERT INTO request_logs (method, path, status_code, bytes_out, duration_ms, ip, api_key_id) VALUES (?, ?, ?, ?, ?, ?, ?)",
r.Method,
r.URL.Path,
srw.status,
srw.bytes,
dur.Milliseconds(),
ip,
apiKeyID,
)
// Update last_used_at for the API key if present
if apiKeyID != nil {
_, _ = db.Query(ctx, "UPDATE api_keys SET last_used_at = CURRENT_TIMESTAMP WHERE id = ?", apiKeyID)
}
}
// remoteAddrIP extracts the actual TCP peer IP from r.RemoteAddr, ignoring
// X-Forwarded-For and other proxy headers. Use this for security-sensitive
// checks like internal auth validation where we need to verify the direct
// connection source (e.g. WireGuard proxy IP), not the original client.
func remoteAddrIP(r *http.Request) string {
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
return r.RemoteAddr
}
return host
}
// getClientIP extracts the client IP from headers or RemoteAddr
func getClientIP(r *http.Request) string {
// X-Forwarded-For may contain a list of IPs, take the first
if xff := strings.TrimSpace(r.Header.Get("X-Forwarded-For")); xff != "" {
parts := strings.Split(xff, ",")
if len(parts) > 0 {
return strings.TrimSpace(parts[0])
}
}
if xr := strings.TrimSpace(r.Header.Get("X-Real-IP")); xr != "" {
return xr
}
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
return r.RemoteAddr
}
return host
}
// domainRoutingMiddleware handles requests to deployment domains and namespace gateways
// This must come BEFORE auth middleware so deployment domains work without API keys
//
// Domain routing patterns:
// - ns-{namespace}.{baseDomain} -> Namespace gateway (proxy to namespace cluster)
// - {name}-{random}.{baseDomain} -> Deployment domain
// - {name}.{baseDomain} -> Deployment domain (legacy)
// - {name}.node-xxx.{baseDomain} -> Legacy format (deprecated, returns 404 for new deployments)
func (g *Gateway) domainRoutingMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
host := strings.Split(r.Host, ":")[0] // Strip port
// Get base domain from config (default to dbrs.space)
baseDomain := "dbrs.space"
if g.cfg != nil && g.cfg.BaseDomain != "" {
baseDomain = g.cfg.BaseDomain
}
// Only process base domain and its subdomains
if !strings.HasSuffix(host, "."+baseDomain) && host != baseDomain {
next.ServeHTTP(w, r)
return
}
// Check for namespace gateway domain FIRST (before API path skip)
// Namespace subdomains (ns-{name}.{baseDomain}) must be proxied to namespace gateways
// regardless of path — including /v1/ paths
suffix := "." + baseDomain
if strings.HasSuffix(host, suffix) {
subdomain := strings.TrimSuffix(host, suffix)
if strings.HasPrefix(subdomain, "ns-") {
namespaceName := strings.TrimPrefix(subdomain, "ns-")
g.handleNamespaceGatewayRequest(w, r, namespaceName)
return
}
}
// Skip API paths (they should use JWT/API key auth on the main gateway)
if strings.HasPrefix(r.URL.Path, "/v1/") || strings.HasPrefix(r.URL.Path, "/.well-known/") {
next.ServeHTTP(w, r)
return
}
// Check if deployment handlers are available
if g.deploymentService == nil || g.staticHandler == nil {
next.ServeHTTP(w, r)
return
}
// Try to find deployment by domain
deployment, err := g.getDeploymentByDomain(r.Context(), host)
if err != nil {
http.Error(w, "Internal server error", http.StatusInternalServerError)
return
}
if deployment == nil {
// Domain matches .{baseDomain} but no deployment found
http.NotFound(w, r)
return
}
// Inject deployment context
ctx := context.WithValue(r.Context(), CtxKeyNamespaceOverride, deployment.Namespace)
ctx = context.WithValue(ctx, "deployment", deployment)
// Route based on deployment type
if deployment.Port == 0 {
// Static deployment - serve from IPFS
g.staticHandler.HandleServe(w, r.WithContext(ctx), deployment)
} else {
// Dynamic deployment - proxy to local port
g.proxyToDynamicDeployment(w, r.WithContext(ctx), deployment)
}
})
}
// handleNamespaceGatewayRequest proxies requests to a namespace's dedicated gateway cluster
// This enables physical isolation where each namespace has its own RQLite, Olric, and Gateway
//
// IMPORTANT: This function validates auth against the MAIN cluster RQLite before proxying.
// The validated namespace is passed to the namespace gateway via X-Internal-Auth-* headers.
// This is necessary because namespace gateways have their own isolated RQLite that doesn't
// contain API keys (API keys are stored in the main cluster RQLite only).
func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.Request, namespaceName string) {
// Validate auth against main cluster RQLite BEFORE proxying
// This ensures API keys work even though they're not in the namespace's RQLite
validatedNamespace, validatedClaims, authErr := g.validateAuthForNamespaceProxy(r)
if authErr != "" && !isPublicPath(r.URL.Path) {
w.Header().Set("WWW-Authenticate", "Bearer error=\"invalid_token\"")
writeError(w, http.StatusUnauthorized, authErr)
return
}
// If auth succeeded, ensure the API key belongs to the target namespace
if validatedNamespace != "" && validatedNamespace != namespaceName {
writeError(w, http.StatusForbidden, "API key does not belong to this namespace")
return
}
// Check middleware cache for namespace gateway targets
type namespaceGatewayTarget struct {
ip string
port int
}
var targets []namespaceGatewayTarget
if g.mwCache != nil {
if cached, ok := g.mwCache.GetNamespaceTargets(namespaceName); ok {
for _, t := range cached {
targets = append(targets, namespaceGatewayTarget{ip: t.ip, port: t.port})
}
}
}
// Cache miss — look up namespace cluster gateway from DB
if len(targets) == 0 {
db := g.client.Database()
internalCtx := client.WithInternalAuth(r.Context())
// Query all ready namespace gateways and choose a stable target.
// Random selection causes WS subscribe and publish calls to hit different
// nodes, which makes pubsub delivery flaky for short-lived subscriptions.
query := `
SELECT COALESCE(dn.internal_ip, dn.ip_address), npa.gateway_http_port
FROM namespace_port_allocations npa
JOIN namespace_clusters nc ON npa.namespace_cluster_id = nc.id
JOIN dns_nodes dn ON npa.node_id = dn.id
WHERE nc.namespace_name = ? AND nc.status = 'ready'
`
result, err := db.Query(internalCtx, query, namespaceName)
if err != nil || result == nil || len(result.Rows) == 0 {
g.logger.ComponentWarn(logging.ComponentGeneral, "namespace gateway not found",
zap.String("namespace", namespaceName),
zap.Error(err),
zap.Bool("result_nil", result == nil),
zap.Int("row_count", func() int { if result != nil { return len(result.Rows) }; return -1 }()),
)
http.Error(w, "Namespace gateway not found", http.StatusNotFound)
return
}
for _, row := range result.Rows {
if len(row) == 0 {
continue
}
ip := getString(row[0])
if ip == "" {
continue
}
port := 10004
if len(row) > 1 {
if p := getInt(row[1]); p > 0 {
port = p
}
}
targets = append(targets, namespaceGatewayTarget{ip: ip, port: port})
}
// Cache the result for subsequent requests
if g.mwCache != nil && len(targets) > 0 {
cacheTargets := make([]gatewayTarget, len(targets))
for i, t := range targets {
cacheTargets[i] = gatewayTarget{ip: t.ip, port: t.port}
}
g.mwCache.SetNamespaceTargets(namespaceName, cacheTargets)
}
}
if len(targets) == 0 {
http.Error(w, "Namespace gateway not available", http.StatusServiceUnavailable)
return
}
// Keep ordering deterministic before hashing, otherwise DB row order can vary.
sort.Slice(targets, func(i, j int) bool {
if targets[i].ip == targets[j].ip {
return targets[i].port < targets[j].port
}
return targets[i].ip < targets[j].ip
})
// Build ordered target list: local gateway first, then hash-selected, then remaining.
// This ordering is used by the circuit breaker fallback loop below.
orderedTargets := make([]namespaceGatewayTarget, 0, len(targets))
localIdx := -1
if g.localWireGuardIP != "" {
for i, t := range targets {
if t.ip == g.localWireGuardIP {
orderedTargets = append(orderedTargets, t)
localIdx = i
break
}
}
}
// Consistent hashing for affinity (keeps WS subscribe/publish on same node)
affinityKey := namespaceName + "|" + validatedNamespace
if apiKey := extractAPIKey(r); apiKey != "" {
affinityKey = namespaceName + "|" + apiKey
} else if authz := strings.TrimSpace(r.Header.Get("Authorization")); authz != "" {
affinityKey = namespaceName + "|" + authz
} else {
affinityKey = namespaceName + "|" + getClientIP(r)
}
hasher := fnv.New32a()
_, _ = hasher.Write([]byte(affinityKey))
hashIdx := int(hasher.Sum32()) % len(targets)
if hashIdx != localIdx {
orderedTargets = append(orderedTargets, targets[hashIdx])
}
for i, t := range targets {
if i != localIdx && i != hashIdx {
orderedTargets = append(orderedTargets, t)
}
}
// Select the first target whose circuit breaker allows a request through.
// This provides automatic failover when a namespace gateway node is down.
var selected namespaceGatewayTarget
var cb *CircuitBreaker
for _, candidate := range orderedTargets {
cbKey := "ns:" + candidate.ip
candidateCB := g.circuitBreakers.Get(cbKey)
if candidateCB.Allow() {
selected = candidate
cb = candidateCB
break
}
}
if selected.ip == "" {
// Bug #219: emit canonical envelope with retryable=true (transient).
httputil.WriteRPCError(w, http.StatusServiceUnavailable,
httputil.ErrCodeServiceUnavailable,
"namespace gateway unavailable: all upstream circuits are open. "+
"Wait a few seconds and retry, or check `orama monitor report` for unhealthy nodes.",
httputil.WithRetryable())
return
}
gatewayIP := selected.ip
gatewayPort := selected.port
targetHost := gatewayIP + ":" + strconv.Itoa(gatewayPort)
// Handle WebSocket upgrade requests specially (http.Client can't handle 101 Switching Protocols)
if isWebSocketUpgrade(r) {
// Set forwarding headers on the original request
r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Forwarded-Proto", "https")
r.Header.Set("X-Forwarded-Host", r.Host)
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers
// the external client may have set BEFORE applying the trusted
// values from this gateway. Prevents identity spoofing on the
// namespace gateway, which gates on source IP only.
stripInboundInternalAuthHeaders(r.Header)
// Set internal auth headers if auth was validated
if validatedNamespace != "" {
r.Header.Set(HeaderInternalAuthValidated, "true")
r.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
// Bug #215: forward validated JWT subject + custom claims so the
// namespace gateway's auth middleware can hydrate ctxKeyJWT and
// host functions see a non-empty caller_jwt_subject.
setInternalAuthJWTHeaders(r.Header, validatedClaims)
}
r.URL.Scheme = "http"
r.URL.Host = targetHost
r.Host = targetHost
if g.proxyWebSocket(w, r, targetHost) {
return
}
// If WebSocket proxy failed and already wrote error, return
return
}
// Proxy regular HTTP request to the namespace gateway
targetURL := "http://" + targetHost + r.URL.Path
if r.URL.RawQuery != "" {
targetURL += "?" + r.URL.RawQuery
}
proxyReq, err := http.NewRequest(r.Method, targetURL, r.Body)
if err != nil {
g.logger.ComponentError(logging.ComponentGeneral, "failed to create namespace gateway proxy request",
zap.String("namespace", namespaceName),
zap.Error(err),
)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return
}
// Copy headers
for key, values := range r.Header {
for _, value := range values {
proxyReq.Header.Add(key, value)
}
}
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
proxyReq.Header.Set("X-Forwarded-Proto", "https")
proxyReq.Header.Set("X-Forwarded-Host", r.Host)
proxyReq.Header.Set("X-Original-Host", r.Host)
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers
// the header-copy loop above may have inherited from the inbound
// request BEFORE writing the trusted values. The namespace gateway
// gates on source IP only; without this strip, an external attacker
// could forge X-Internal-Auth-JWT-Sub and impersonate any wallet.
stripInboundInternalAuthHeaders(proxyReq.Header)
// Set internal auth headers if auth was validated by main gateway
// This allows the namespace gateway to trust the authentication
if validatedNamespace != "" {
proxyReq.Header.Set(HeaderInternalAuthValidated, "true")
proxyReq.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
// Bug #215: forward validated JWT subject + custom claims so the
// namespace gateway's auth middleware can hydrate ctxKeyJWT and
// host functions see a non-empty caller_jwt_subject.
setInternalAuthJWTHeaders(proxyReq.Header, validatedClaims)
}
// Pick the proxy timeout based on the path's expected work bound.
// Defaults to 30s for fast paths; bumps to 300s for known long-running
// classes (uploads, function invocations) so we don't truncate before
// the namespace gateway's own per-function timeout fires.
//
// Bug #219: function invokes were truncated at 30s and surfaced as
// "Namespace gateway unavailable" — misleading because the gateway IS
// up, the FUNCTION just exceeded its budget. Aligning the proxy
// timeout with the function-level cap ensures the namespace gateway's
// proper TIMEOUT envelope reaches the client first.
proxyTimeout := 30 * time.Second
if isLongRunningProxyPath(r.URL.Path) {
proxyTimeout = 300 * time.Second
}
// Execute proxy request using shared transport for connection pooling
httpClient := &http.Client{Timeout: proxyTimeout, Transport: g.proxyTransport}
resp, err := httpClient.Do(proxyReq)
if err != nil {
cb.RecordFailure()
g.logger.ComponentError(logging.ComponentGeneral, "namespace gateway proxy request failed",
zap.String("namespace", namespaceName),
zap.String("target", gatewayIP),
zap.Error(err),
)
// Distinguish timeout from connection failure so clients get an
// actionable code (#219). "Namespace gateway unavailable" was
// emitted indiscriminately and sent operators down the wrong
// debug path.
if isProxyTimeoutErr(err) {
httputil.WriteRPCError(w, http.StatusGatewayTimeout,
httputil.ErrCodeTimeout,
"function or upstream call exceeded the proxy budget ("+
proxyTimeout.String()+
"). Increase the function's timeout: in function.yaml (max 300s) or split the work into smaller invocations.",
)
return
}
httputil.WriteRPCError(w, http.StatusServiceUnavailable,
httputil.ErrCodeServiceUnavailable,
"namespace gateway unavailable: "+err.Error())
return
}
defer resp.Body.Close()
if IsResponseFailure(resp.StatusCode) {
cb.RecordFailure()
} else {
cb.RecordSuccess()
}
// Copy response headers
for key, values := range resp.Header {
for _, value := range values {
w.Header().Add(key, value)
}
}
// Write status code and body
w.WriteHeader(resp.StatusCode)
io.Copy(w, resp.Body)
}
// getDeploymentByDomain looks up a deployment by its domain
// Supports formats like:
// - {name}-{random}.{baseDomain} (e.g., myapp-f3o4if.dbrs.space) - new format with random suffix
// - {name}.{baseDomain} (e.g., myapp.dbrs.space) - legacy format (backwards compatibility)
// - {name}.node-{shortID}.{baseDomain} (legacy format for backwards compatibility)
// - custom domains via deployment_domains table
func (g *Gateway) getDeploymentByDomain(ctx context.Context, domain string) (*deployments.Deployment, error) {
if g.deploymentService == nil {
return nil, nil
}
// Strip trailing dot if present
domain = strings.TrimSuffix(domain, ".")
// Get base domain from config (default to dbrs.space)
baseDomain := "dbrs.space"
if g.cfg != nil && g.cfg.BaseDomain != "" {
baseDomain = g.cfg.BaseDomain
}
db := g.client.Database()
internalCtx := client.WithInternalAuth(ctx)
// Parse domain to extract deployment subdomain/name
suffix := "." + baseDomain
if strings.HasSuffix(domain, suffix) {
subdomain := strings.TrimSuffix(domain, suffix)
parts := strings.Split(subdomain, ".")
// Primary format: {subdomain}.{baseDomain} (e.g., myapp-f3o4if.dbrs.space)
// The subdomain can be either:
// - {name}-{random} (new format)
// - {name} (legacy format)
if len(parts) == 1 {
subdomainOrName := parts[0]
// First, try to find by subdomain (new format: name-random)
query := `
SELECT id, namespace, name, type, port, content_cid, status, home_node_id, subdomain
FROM deployments
WHERE subdomain = ?
AND status IN ('active', 'degraded')
LIMIT 1
`
result, err := db.Query(internalCtx, query, subdomainOrName)
if err == nil && len(result.Rows) > 0 {
row := result.Rows[0]
return &deployments.Deployment{
ID: getString(row[0]),
Namespace: getString(row[1]),
Name: getString(row[2]),
Type: deployments.DeploymentType(getString(row[3])),
Port: getInt(row[4]),
ContentCID: getString(row[5]),
Status: deployments.DeploymentStatus(getString(row[6])),
HomeNodeID: getString(row[7]),
Subdomain: getString(row[8]),
}, nil
}
// Fallback: try by name for legacy deployments (without random suffix)
query = `
SELECT id, namespace, name, type, port, content_cid, status, home_node_id, subdomain
FROM deployments
WHERE name = ?
AND status IN ('active', 'degraded')
LIMIT 1
`
result, err = db.Query(internalCtx, query, subdomainOrName)
if err == nil && len(result.Rows) > 0 {
row := result.Rows[0]
return &deployments.Deployment{
ID: getString(row[0]),
Namespace: getString(row[1]),
Name: getString(row[2]),
Type: deployments.DeploymentType(getString(row[3])),
Port: getInt(row[4]),
ContentCID: getString(row[5]),
Status: deployments.DeploymentStatus(getString(row[6])),
HomeNodeID: getString(row[7]),
Subdomain: getString(row[8]),
}, nil
}
}
}
// Try custom domain from deployment_domains table
query := `
SELECT d.id, d.namespace, d.name, d.type, d.port, d.content_cid, d.status, d.home_node_id
FROM deployments d
JOIN deployment_domains dd ON d.id = dd.deployment_id
WHERE dd.domain = ? AND dd.verified_at IS NOT NULL
AND d.status IN ('active', 'degraded')
LIMIT 1
`
result, err := db.Query(internalCtx, query, domain)
if err == nil && len(result.Rows) > 0 {
row := result.Rows[0]
return &deployments.Deployment{
ID: getString(row[0]),
Namespace: getString(row[1]),
Name: getString(row[2]),
Type: deployments.DeploymentType(getString(row[3])),
Port: getInt(row[4]),
ContentCID: getString(row[5]),
Status: deployments.DeploymentStatus(getString(row[6])),
HomeNodeID: getString(row[7]),
}, nil
}
return nil, nil
}
// proxyToDynamicDeployment proxies requests to a dynamic deployment's local port
// If the deployment is on a different node, it forwards the request to that node.
// With replica support, it first checks if the current node is a replica and can
// serve the request locally using the replica's port.
func (g *Gateway) proxyToDynamicDeployment(w http.ResponseWriter, r *http.Request, deployment *deployments.Deployment) {
if deployment.Port == 0 {
http.Error(w, "Deployment has no assigned port", http.StatusServiceUnavailable)
return
}
// Check if request was already forwarded by another node (loop prevention)
proxyNode := r.Header.Get("X-Orama-Proxy-Node")
// Check if this deployment is on the current node (primary)
if g.nodePeerID != "" && deployment.HomeNodeID != "" &&
deployment.HomeNodeID != g.nodePeerID && proxyNode == "" {
// Check if this node is a replica and can serve locally
if g.replicaManager != nil {
replicaPort, err := g.replicaManager.GetReplicaPort(r.Context(), deployment.ID, g.nodePeerID)
if err == nil && replicaPort > 0 {
// This node is a replica — serve locally using the replica's port
g.logger.Debug("Serving from local replica",
zap.String("deployment", deployment.Name),
zap.Int("replica_port", replicaPort),
)
deployment.Port = replicaPort
// Fall through to local proxy below
goto serveLocal
}
}
// Not a replica on this node — proxy to a healthy replica node
if g.proxyCrossNodeWithReplicas(w, r, deployment) {
return
}
// Fall through if cross-node proxy failed - try local anyway
g.logger.Warn("Cross-node proxy failed, attempting local fallback",
zap.String("deployment", deployment.Name),
zap.String("home_node", deployment.HomeNodeID),
)
}
serveLocal:
// Create a simple reverse proxy to localhost
targetHost := "localhost:" + strconv.Itoa(deployment.Port)
target := "http://" + targetHost
// SECURITY (bug #215 follow-up): drop X-Internal-Auth-* headers before
// forwarding to a customer-deployed app on localhost. The app has no
// reason to see Orama's internal-auth metadata, and stripping prevents
// any accidental leakage if the customer ever adds header-based trust.
stripInboundInternalAuthHeaders(r.Header)
// Set proxy headers
r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Forwarded-Proto", "https")
r.Header.Set("X-Forwarded-Host", r.Host)
// Handle WebSocket upgrade requests specially
if isWebSocketUpgrade(r) {
r.URL.Scheme = "http"
r.URL.Host = targetHost
r.Host = targetHost
if g.proxyWebSocket(w, r, targetHost) {
return
}
// WebSocket proxy failed - try cross-node replicas as fallback
if g.replicaManager != nil {
if g.proxyCrossNodeWithReplicas(w, r, deployment) {
return
}
}
http.Error(w, "WebSocket connection failed", http.StatusServiceUnavailable)
return
}
// Create a new request to the backend
backendURL := target + r.URL.Path
if r.URL.RawQuery != "" {
backendURL += "?" + r.URL.RawQuery
}
proxyReq, err := http.NewRequest(r.Method, backendURL, r.Body)
if err != nil {
http.Error(w, "Failed to create proxy request", http.StatusInternalServerError)
return
}
// Copy headers
for key, values := range r.Header {
for _, value := range values {
proxyReq.Header.Add(key, value)
}
}
// Execute proxy request using shared transport
httpClient := &http.Client{Timeout: 30 * time.Second, Transport: g.proxyTransport}
resp, err := httpClient.Do(proxyReq)
if err != nil {
g.logger.ComponentError(logging.ComponentGeneral, "local proxy request failed",
zap.String("target", target),
zap.Error(err),
)
// Local process is down — try other replica nodes before giving up
if g.replicaManager != nil {
if g.proxyCrossNodeWithReplicas(w, r, deployment) {
return
}
}
http.Error(w, "Service unavailable", http.StatusServiceUnavailable)
return
}
defer resp.Body.Close()
// Copy response headers
for key, values := range resp.Header {
for _, value := range values {
w.Header().Add(key, value)
}
}
// Write status code and body
w.WriteHeader(resp.StatusCode)
if _, err := w.(io.Writer).Write([]byte{}); err == nil {
io.Copy(w, resp.Body)
}
}
// proxyCrossNode forwards a request to the home node of a deployment
// Returns true if the request was successfully forwarded, false otherwise
func (g *Gateway) proxyCrossNode(w http.ResponseWriter, r *http.Request, deployment *deployments.Deployment) bool {
// Get home node IP from dns_nodes table
db := g.client.Database()
internalCtx := client.WithInternalAuth(r.Context())
query := "SELECT COALESCE(internal_ip, ip_address) FROM dns_nodes WHERE id = ? LIMIT 1"
result, err := db.Query(internalCtx, query, deployment.HomeNodeID)
if err != nil || result == nil || len(result.Rows) == 0 {
g.logger.Warn("Failed to get home node IP",
zap.String("home_node_id", deployment.HomeNodeID),
zap.Error(err))
return false
}
homeIP := getString(result.Rows[0][0])
if homeIP == "" {
g.logger.Warn("Home node IP is empty", zap.String("home_node_id", deployment.HomeNodeID))
return false
}
g.logger.Info("Proxying request to home node",
zap.String("deployment", deployment.Name),
zap.String("home_node_id", deployment.HomeNodeID),
zap.String("home_ip", homeIP),
zap.String("current_node", g.nodePeerID),
)
// Proxy to home node via internal HTTP port (6001)
// This is node-to-node internal communication - no TLS needed
targetHost := homeIP + ":6001"
// Handle WebSocket upgrade requests specially
if isWebSocketUpgrade(r) {
// SECURITY (bug #215 follow-up): drop X-Internal-Auth-* headers from
// the inbound request before forwarding to another node. The target
// node's authMiddleware gates internal-auth trust on source IP only;
// without this strip, an external attacker could forge JWT-Sub here
// and have the home node trust it after the cross-node hop.
stripInboundInternalAuthHeaders(r.Header)
r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
r.URL.Scheme = "http"
r.URL.Host = targetHost
// Keep original Host header for domain routing
return g.proxyWebSocket(w, r, targetHost)
}
targetURL := "http://" + targetHost + r.URL.Path
if r.URL.RawQuery != "" {
targetURL += "?" + r.URL.RawQuery
}
proxyReq, err := http.NewRequest(r.Method, targetURL, r.Body)
if err != nil {
g.logger.Error("Failed to create cross-node proxy request", zap.Error(err))
return false
}
// Copy headers and set Host header to original domain for routing
for key, values := range r.Header {
for _, value := range values {
proxyReq.Header.Add(key, value)
}
}
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers the
// inbound request may have carried. See WS branch above.
stripInboundInternalAuthHeaders(proxyReq.Header)
proxyReq.Host = r.Host // Keep original host for domain routing on target node
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) // Prevent loops
// Circuit breaker: check if target node is healthy
cbKey := "node:" + homeIP
cb := g.circuitBreakers.Get(cbKey)
if !cb.Allow() {
g.logger.Warn("Cross-node proxy skipped (circuit open)", zap.String("target_ip", homeIP))
return false
}
// Internal node-to-node communication using shared transport
httpClient := &http.Client{Timeout: 120 * time.Second, Transport: g.proxyTransport}
resp, err := httpClient.Do(proxyReq)
if err != nil {
cb.RecordFailure()
g.logger.Error("Cross-node proxy request failed",
zap.String("target_ip", homeIP),
zap.String("host", r.Host),
zap.Error(err))
return false
}
defer resp.Body.Close()
if IsResponseFailure(resp.StatusCode) {
cb.RecordFailure()
} else {
cb.RecordSuccess()
}
// Copy response headers
for key, values := range resp.Header {
for _, value := range values {
w.Header().Add(key, value)
}
}
// Write status code and body
w.WriteHeader(resp.StatusCode)
io.Copy(w, resp.Body)
return true
}
// proxyCrossNodeWithReplicas tries to proxy a request to any healthy replica node.
// It first tries the primary (home node), then falls back to other replicas.
// Returns true if the request was successfully proxied.
func (g *Gateway) proxyCrossNodeWithReplicas(w http.ResponseWriter, r *http.Request, deployment *deployments.Deployment) bool {
if g.replicaManager == nil {
// No replica manager — fall back to original single-node proxy
return g.proxyCrossNode(w, r, deployment)
}
// Get all active replica nodes
replicaNodes, err := g.replicaManager.GetActiveReplicaNodes(r.Context(), deployment.ID)
if err != nil || len(replicaNodes) == 0 {
// Fall back to original home node proxy
return g.proxyCrossNode(w, r, deployment)
}
// Try each replica node (primary first if present)
for _, nodeID := range replicaNodes {
if nodeID == g.nodePeerID {
continue // Skip self
}
nodeIP, err := g.replicaManager.GetNodeIP(r.Context(), nodeID)
if err != nil {
g.logger.Warn("Failed to get replica node IP",
zap.String("node_id", nodeID),
zap.Error(err),
)
continue
}
// Proxy using the same logic as proxyCrossNode
proxyDeployment := *deployment
proxyDeployment.HomeNodeID = nodeID
if g.proxyCrossNodeToIP(w, r, &proxyDeployment, nodeIP) {
return true
}
}
return false
}
// proxyCrossNodeToIP forwards a request to a specific node IP.
// This is a variant of proxyCrossNode that takes a resolved IP directly.
func (g *Gateway) proxyCrossNodeToIP(w http.ResponseWriter, r *http.Request, deployment *deployments.Deployment, nodeIP string) bool {
g.logger.Info("Proxying request to replica node",
zap.String("deployment", deployment.Name),
zap.String("node_id", deployment.HomeNodeID),
zap.String("node_ip", nodeIP),
)
targetHost := nodeIP + ":6001"
// Handle WebSocket upgrade requests specially
if isWebSocketUpgrade(r) {
// SECURITY (bug #215 follow-up): see proxyCrossNode for rationale.
stripInboundInternalAuthHeaders(r.Header)
r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
r.URL.Scheme = "http"
r.URL.Host = targetHost
return g.proxyWebSocket(w, r, targetHost)
}
targetURL := "http://" + targetHost + r.URL.Path
if r.URL.RawQuery != "" {
targetURL += "?" + r.URL.RawQuery
}
proxyReq, err := http.NewRequest(r.Method, targetURL, r.Body)
if err != nil {
g.logger.Error("Failed to create cross-node proxy request", zap.Error(err))
return false
}
for key, values := range r.Header {
for _, value := range values {
proxyReq.Header.Add(key, value)
}
}
// SECURITY (bug #215 follow-up): see proxyCrossNode for rationale.
stripInboundInternalAuthHeaders(proxyReq.Header)
proxyReq.Host = r.Host
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
// Circuit breaker: skip this replica if it's been failing
cbKey := "node:" + nodeIP
cb := g.circuitBreakers.Get(cbKey)
if !cb.Allow() {
g.logger.Warn("Replica proxy skipped (circuit open)", zap.String("target_ip", nodeIP))
return false
}
httpClient := &http.Client{Timeout: 5 * time.Second, Transport: g.proxyTransport}
resp, err := httpClient.Do(proxyReq)
if err != nil {
cb.RecordFailure()
g.logger.Warn("Replica proxy request failed",
zap.String("target_ip", nodeIP),
zap.Error(err),
)
return false
}
defer resp.Body.Close()
// If the remote node returned a gateway error, try the next replica
if IsResponseFailure(resp.StatusCode) {
cb.RecordFailure()
g.logger.Warn("Replica returned gateway error, trying next",
zap.String("target_ip", nodeIP),
zap.Int("status", resp.StatusCode),
)
return false
}
cb.RecordSuccess()
for key, values := range resp.Header {
for _, value := range values {
w.Header().Add(key, value)
}
}
w.WriteHeader(resp.StatusCode)
io.Copy(w, resp.Body)
return true
}
// Helper functions for type conversion
func getString(v interface{}) string {
if s, ok := v.(string); ok {
return s
}
return ""
}
func getInt(v interface{}) int {
if i, ok := v.(int); ok {
return i
}
if i, ok := v.(int64); ok {
return int(i)
}
if f, ok := v.(float64); ok {
return int(f)
}
return 0
}