From 98dad46a8134531b511295a051f9434df4d1c310 Mon Sep 17 00:00:00 2001 From: anonpenguin23 Date: Sun, 24 May 2026 20:56:08 +0300 Subject: [PATCH] fix(gateway): decouple webrtc route registration from legacy flag - Update route registration logic to rely solely on SFUPort > 0, resolving a silent 404 issue where gateways with valid SFU configurations were incorrectly disabled. - Retain WebRTCEnabled in config for backward compatibility with existing operator YAML and request schemas. - Add unit tests to pin registration behavior and prevent future regressions. --- core/pkg/gateway/config.go | 16 +++- core/pkg/gateway/gateway.go | 39 +++++++- core/pkg/gateway/webrtc_route_gate_test.go | 102 +++++++++++++++++++++ 3 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 core/pkg/gateway/webrtc_route_gate_test.go diff --git a/core/pkg/gateway/config.go b/core/pkg/gateway/config.go index 323ae48..0179b40 100644 --- a/core/pkg/gateway/config.go +++ b/core/pkg/gateway/config.go @@ -51,11 +51,19 @@ type Config struct { // Loaded from ~/.orama/secrets/api-key-hmac-secret. APIKeyHMACSecret string - // WebRTC configuration (set when namespace has WebRTC enabled) - WebRTCEnabled bool // Whether WebRTC endpoints are active on this gateway - SFUPort int // Local SFU signaling port to proxy WebSocket connections to + // WebRTC configuration (set when namespace has WebRTC enabled). + // + // WebRTCEnabled is RETAINED for back-compat with operator YAML and + // the spawn-handler request shape, but no longer gates route + // registration (bugboard #411). Routes auto-register whenever + // SFUPort > 0 — the actual operational prerequisite. Validate still + // uses WebRTCEnabled to enforce "if you opted in, you MUST set the + // dependent fields", which catches obvious YAML typos at config + // load. + WebRTCEnabled bool // legacy opt-in; routes auto-register when SFUPort>0 regardless. Kept for back-compat. + SFUPort int // Local SFU signaling port to proxy WebSocket connections to. >0 = WebRTC routes registered. TURNDomain string // TURN server domain for credential generation - TURNSecret string // HMAC-SHA1 shared secret for TURN credential generation + TURNSecret string // HMAC-SHA1 shared secret for TURN credential generation (empty → /v1/webrtc/turn/credentials returns 503) // StealthCDNDomain, when set, makes the WebRTC credentials handler // advertise turns::443 (served by the SNI router). diff --git a/core/pkg/gateway/gateway.go b/core/pkg/gateway/gateway.go index 4de7cda..2d9b644 100644 --- a/core/pkg/gateway/gateway.go +++ b/core/pkg/gateway/gateway.go @@ -414,7 +414,9 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) { gw.pushHandlers.SetCredentialsManager(deps.PushCredentialsManager) } - if cfg.WebRTCEnabled && cfg.SFUPort > 0 { + // WebRTC route registration. See shouldRegisterWebRTCRoutes for the + // gate's full rationale (bugboard #411). + if shouldRegisterWebRTCRoutes(cfg) { gw.webrtcHandlers = webrtchandlers.NewWebRTCHandlers( logger, gw.localWireGuardIP, @@ -424,7 +426,9 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) { gw.proxyWebSocket, ) logger.ComponentInfo(logging.ComponentGeneral, "WebRTC handlers initialized", - zap.Int("sfu_port", cfg.SFUPort)) + zap.Int("sfu_port", cfg.SFUPort), + zap.Bool("turn_secret_set", cfg.TURNSecret != ""), + zap.Bool("legacy_webrtc_enabled_flag", cfg.WebRTCEnabled)) } if deps.OlricClient != nil { @@ -740,6 +744,37 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) { return gw, nil } +// shouldRegisterWebRTCRoutes decides whether `/v1/webrtc/*` routes +// (turn/credentials, signal, rooms) get wired up in the request mux. +// +// Bugboard #411 — pre-fix this required BOTH cfg.WebRTCEnabled AND +// cfg.SFUPort > 0. The boolean flag was a silent-404 footgun: spawn- +// handler-provisioned namespace gateways defaulted to +// WebRTCEnabled=false even when their SFU service was up and SFUPort +// was set. AnChat hit 404 on /v1/webrtc/turn/credentials for ~3 +// months because of this even though TURN was operationally usable. +// +// Post-fix: SFUPort > 0 alone gates registration. SFUPort is the +// actual operational prerequisite — the SFU proxy can't function +// without it, and operators who set SFUPort have already opted in. +// cfg.WebRTCEnabled is kept on the Config struct for back-compat with +// operator YAML and the spawn-handler request shape, but ignored at +// this gate. +// +// TURNSecret intentionally NOT in the gate. /v1/webrtc/signal and +// /v1/webrtc/rooms work without TURN (the SFU proxy alone). The +// credentials endpoint internally 503s "TURN not configured" when +// TURNSecret is empty — that's an ACTIONABLE error operators can +// trace, unlike the silent 404 that #411 reported. +// +// Extracted to a named function so the route-gate test can exercise +// the EXACT runtime logic without spinning up a full Gateway. If you +// change this function, update the gate's call site at the same time +// — or the test passes while live behavior diverges. +func shouldRegisterWebRTCRoutes(cfg *Config) bool { + return cfg.SFUPort > 0 +} + // getLocalSubscribers returns all local subscribers for a given topic and namespace func (g *Gateway) getLocalSubscribers(topic, namespace string) []*localSubscriber { topicKey := namespace + "." + topic diff --git a/core/pkg/gateway/webrtc_route_gate_test.go b/core/pkg/gateway/webrtc_route_gate_test.go new file mode 100644 index 0000000..38bc990 --- /dev/null +++ b/core/pkg/gateway/webrtc_route_gate_test.go @@ -0,0 +1,102 @@ +package gateway + +import ( + "testing" +) + +// Bugboard #411 — WebRTC route registration gate. +// +// Pre-fix the gate was `cfg.WebRTCEnabled && cfg.SFUPort > 0`. The +// boolean flag was a silent-404 footgun: spawn-handler-provisioned +// namespace gateways defaulted to WebRTCEnabled=false even when their +// SFU service was running and SFUPort was set, so every call to +// /v1/webrtc/turn/credentials returned 404 (not 503, not 401) for +// months — AnChat hit this on devnet for ~3 months before reporting. +// +// Post-fix: SFUPort > 0 alone gates registration. The legacy +// WebRTCEnabled boolean is retained on the Config struct for spawn- +// request back-compat but ignored at the gate. +// +// These tests pin the new gate semantics so a future refactor of +// gateway.go's startup wiring can't silently re-introduce the +// AND-with-boolean misconfig class. + +// All four tests below call the SAME `shouldRegisterWebRTCRoutes` +// helper that the runtime calls — defined alongside the gateway code +// in gateway.go. If the runtime gate changes, the test breaks +// immediately rather than silently passing while live behavior +// diverges (the classic "test duplicates implementation" anti-pattern). + +func TestWebRTCRouteGate_RegistersWhenSFUPortSet_RegardlessOfWebRTCEnabled(t *testing.T) { + // The actual #411 bug: WebRTCEnabled=false (default for spawn- + // provisioned namespace gateways) + SFUPort>0 (operator did + // configure the SFU). Pre-fix this returned `false` → no routes + // → 404. Post-fix MUST return true. + cfg := &Config{ + WebRTCEnabled: false, + SFUPort: 7800, + TURNSecret: "shared-secret", + TURNDomain: "turn.example.com", + } + if !shouldRegisterWebRTCRoutes(cfg) { + t.Errorf("BUG #411 REGRESSION: SFUPort=%d configured but routes not registered "+ + "because legacy WebRTCEnabled=false. This is exactly the silent-404 footgun "+ + "the fix was supposed to eliminate.", cfg.SFUPort) + } +} + +func TestWebRTCRouteGate_RegistersWhenBothEnabledAndPortSet(t *testing.T) { + // Pre-fix happy path — operator explicitly opted in via the + // legacy boolean. Must still register so existing configs work. + cfg := &Config{ + WebRTCEnabled: true, + SFUPort: 7800, + TURNSecret: "shared-secret", + } + if !shouldRegisterWebRTCRoutes(cfg) { + t.Error("explicit WebRTCEnabled=true + SFUPort>0: routes MUST register (back-compat)") + } +} + +func TestWebRTCRouteGate_SkipsWhenSFUPortZero(t *testing.T) { + // No SFU port = no functional SFU proxy = registering routes + // would just produce broken 500s on /v1/webrtc/signal. Better to + // not register. This is the "namespace genuinely doesn't want + // WebRTC" path. + cases := []struct { + name string + cfg *Config + }{ + {"both unset", &Config{}}, + {"webrtc explicitly enabled but no port", &Config{WebRTCEnabled: true, SFUPort: 0}}, + {"port is negative (sentinel)", &Config{SFUPort: -1}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if shouldRegisterWebRTCRoutes(tc.cfg) { + t.Errorf("SFUPort=%d: routes MUST NOT register without a real SFU port", + tc.cfg.SFUPort) + } + }) + } +} + +func TestWebRTCRouteGate_TURNSecretMissingStillRegisters(t *testing.T) { + // Important: SFUPort>0 + TURNSecret="" should still REGISTER the + // routes. /v1/webrtc/signal and /v1/webrtc/rooms work without TURN + // (TURN is only for the credentials endpoint). And the credentials + // handler internally returns 503 "TURN not configured" when secret + // is missing — which is an ACTIONABLE error operators can fix, + // unlike the silent 404 that #411 reported. + // + // If a future refactor moves the TURNSecret check into the gate, + // /v1/webrtc/signal disappears too and SFU-only namespaces break. + cfg := &Config{ + SFUPort: 7800, + TURNSecret: "", // intentionally missing + } + if !shouldRegisterWebRTCRoutes(cfg) { + t.Error("SFUPort>0 + TURNSecret empty: routes MUST still register so /v1/webrtc/signal works; " + + "the credentials endpoint surfaces 503 internally for the missing secret") + } +}