mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-06-17 01:34:13 +00:00
feat(gateway): fix WebRTC config persistence and endpoint access
- Add internal WebRTC management endpoints to public path exemption list - Implement DB fallback for WebRTC configuration during cluster restore - Add unit tests to verify WebRTC config precedence and state self-healing
This commit is contained in:
parent
9bace7bbf4
commit
4fc975216f
@ -660,6 +660,18 @@ func isPublicPath(p string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Namespace WebRTC management endpoints (enable/disable/status). Auth is
|
||||||
|
// handled INSIDE the handlers by the X-Orama-Internal-Auth header +
|
||||||
|
// WireGuard-peer source check (same as spawn/repair above). Without this
|
||||||
|
// exemption the API-key middleware rejects them with "missing API key"
|
||||||
|
// before the handler's internal-auth check runs, making the internal
|
||||||
|
// endpoints unreachable — so `orama namespace enable webrtc` had no
|
||||||
|
// working path (the public endpoint hits a gateway without the WebRTC
|
||||||
|
// manager wired). Bugboard: internal webrtc mgmt endpoints unreachable.
|
||||||
|
if strings.HasPrefix(p, "/v1/internal/namespace/webrtc/") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// Vault proxy endpoints (no auth — rate-limited per identity hash within handler)
|
// Vault proxy endpoints (no auth — rate-limited per identity hash within handler)
|
||||||
if strings.HasPrefix(p, "/v1/vault/") {
|
if strings.HasPrefix(p, "/v1/vault/") {
|
||||||
return true
|
return true
|
||||||
|
|||||||
@ -171,6 +171,15 @@ func TestIsPublicPath(t *testing.T) {
|
|||||||
{"internal join", "/v1/internal/join", true},
|
{"internal join", "/v1/internal/join", true},
|
||||||
{"internal namespace spawn", "/v1/internal/namespace/spawn", true},
|
{"internal namespace spawn", "/v1/internal/namespace/spawn", true},
|
||||||
{"internal namespace repair", "/v1/internal/namespace/repair", true},
|
{"internal namespace repair", "/v1/internal/namespace/repair", true},
|
||||||
|
// Internal WebRTC mgmt endpoints — exempt from API-key middleware
|
||||||
|
// (handler enforces internal-auth header + WireGuard peer). Without
|
||||||
|
// these, `orama namespace enable webrtc` had no working path.
|
||||||
|
{"internal webrtc enable", "/v1/internal/namespace/webrtc/enable", true},
|
||||||
|
{"internal webrtc disable", "/v1/internal/namespace/webrtc/disable", true},
|
||||||
|
{"internal webrtc status", "/v1/internal/namespace/webrtc/status", true},
|
||||||
|
// Guard: the PUBLIC webrtc mgmt path must STILL require auth (only
|
||||||
|
// the /internal/ variant is exempt).
|
||||||
|
{"public webrtc enable still requires auth", "/v1/namespace/webrtc/enable", false},
|
||||||
{"phantom session", "/v1/auth/phantom/session", true},
|
{"phantom session", "/v1/auth/phantom/session", true},
|
||||||
{"phantom complete", "/v1/auth/phantom/complete", true},
|
{"phantom complete", "/v1/auth/phantom/complete", true},
|
||||||
|
|
||||||
|
|||||||
@ -1815,6 +1815,52 @@ func (cm *ClusterManager) RestoreLocalClustersFromDisk(ctx context.Context) (int
|
|||||||
return restored, nil
|
return restored, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// restoreWebRTC is the resolved WebRTC gateway config for a restored
|
||||||
|
// namespace gateway.
|
||||||
|
type restoreWebRTC struct {
|
||||||
|
enabled bool
|
||||||
|
sfuPort int
|
||||||
|
turnDomain string
|
||||||
|
turnSecret string
|
||||||
|
}
|
||||||
|
|
||||||
|
// chooseRestoreWebRTC decides the WebRTC fields for a restored namespace
|
||||||
|
// gateway. The local state file wins when it carries a complete WebRTC
|
||||||
|
// block; otherwise the DB (consulted lazily via dbFetch — only when the
|
||||||
|
// state file is incomplete) is the source of truth. Returns a disabled
|
||||||
|
// result when neither source has a usable block.
|
||||||
|
//
|
||||||
|
// Bugboard #25: namespaces that had WebRTC enabled AFTER their state file
|
||||||
|
// was written carry no SFU/TURN fields in state. Without the DB fallback,
|
||||||
|
// the from-disk restore regenerates the gateway config without the webrtc
|
||||||
|
// block on every restart — SFU/TURN keep running but the gateway loses
|
||||||
|
// turn_secret + sfu_port (credentials configured:false, routes 404).
|
||||||
|
//
|
||||||
|
// Extracted as a pure function so the precedence is unit-testable without
|
||||||
|
// standing up the full restore path (systemd spawner + DB + port store).
|
||||||
|
func chooseRestoreWebRTC(
|
||||||
|
stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret string,
|
||||||
|
dbFetch func() (enabled bool, sfuPort int, turnDomain, turnSecret string),
|
||||||
|
) restoreWebRTC {
|
||||||
|
if stateHasSFU && stateSFUPort > 0 && stateTURNSecret != "" {
|
||||||
|
return restoreWebRTC{
|
||||||
|
enabled: true,
|
||||||
|
sfuPort: stateSFUPort,
|
||||||
|
turnDomain: stateTURNDomain,
|
||||||
|
turnSecret: stateTURNSecret,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if enabled, sfuPort, turnDomain, turnSecret := dbFetch(); enabled && sfuPort > 0 && turnSecret != "" {
|
||||||
|
return restoreWebRTC{
|
||||||
|
enabled: true,
|
||||||
|
sfuPort: sfuPort,
|
||||||
|
turnDomain: turnDomain,
|
||||||
|
turnSecret: turnSecret,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return restoreWebRTC{}
|
||||||
|
}
|
||||||
|
|
||||||
// restoreClusterFromState restores all processes for a cluster using local state (no DB queries).
|
// restoreClusterFromState restores all processes for a cluster using local state (no DB queries).
|
||||||
func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *ClusterLocalState) error {
|
func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *ClusterLocalState) error {
|
||||||
cm.logger.Info("Restoring namespace cluster from local state",
|
cm.logger.Info("Restoring namespace cluster from local state",
|
||||||
@ -1961,12 +2007,44 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
|||||||
IPFSReplicationFactor: cm.ipfsReplicationFactor,
|
IPFSReplicationFactor: cm.ipfsReplicationFactor,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add WebRTC config from persisted local state
|
// Resolve WebRTC config for the restored gateway. Prefer the
|
||||||
if state.HasSFU && state.SFUSignalingPort > 0 && state.TURNSharedSecret != "" {
|
// local state file; fall back to the DB (source of truth) to
|
||||||
|
// self-heal stale state. Bugboard #25 — the state file is NOT
|
||||||
|
// updated by EnableWebRTC, so a namespace enabled AFTER its state
|
||||||
|
// file was written carries no SFU/TURN fields here. Because this
|
||||||
|
// from-disk restore runs BEFORE the DB-backed restore and
|
||||||
|
// succeeds, the gateway config would otherwise be regenerated
|
||||||
|
// WITHOUT the webrtc block on every restart — SFU/TURN services
|
||||||
|
// keep running but the gateway has empty turn_secret + sfu_port=0
|
||||||
|
// (credentials return configured:false / 404, routes don't
|
||||||
|
// register). The lazy dbFetch only hits the DB when the state
|
||||||
|
// file is incomplete.
|
||||||
|
wr := chooseRestoreWebRTC(
|
||||||
|
state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret,
|
||||||
|
func() (bool, int, string, string) {
|
||||||
|
webrtcCfg, err := cm.GetWebRTCConfig(ctx, state.NamespaceName)
|
||||||
|
if err != nil || webrtcCfg == nil {
|
||||||
|
return false, 0, "", ""
|
||||||
|
}
|
||||||
|
sfuBlock, err := cm.webrtcPortAllocator.GetSFUPorts(ctx, state.ClusterID, cm.localNodeID)
|
||||||
|
if err != nil || sfuBlock == nil {
|
||||||
|
return false, 0, "", ""
|
||||||
|
}
|
||||||
|
return true, sfuBlock.SFUSignalingPort,
|
||||||
|
fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
|
||||||
|
webrtcCfg.TURNSharedSecret
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if wr.enabled {
|
||||||
gwCfg.WebRTCEnabled = true
|
gwCfg.WebRTCEnabled = true
|
||||||
gwCfg.SFUPort = state.SFUSignalingPort
|
gwCfg.SFUPort = wr.sfuPort
|
||||||
gwCfg.TURNDomain = state.TURNDomain
|
gwCfg.TURNDomain = wr.turnDomain
|
||||||
gwCfg.TURNSecret = state.TURNSharedSecret
|
gwCfg.TURNSecret = wr.turnSecret
|
||||||
|
if !state.HasSFU {
|
||||||
|
cm.logger.Info("Re-materialized WebRTC gateway config from DB (state file was stale)",
|
||||||
|
zap.String("namespace", state.NamespaceName),
|
||||||
|
zap.Int("sfu_port", wr.sfuPort))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cm.spawnGatewayWithSystemd(ctx, gwCfg); err != nil {
|
if err := cm.spawnGatewayWithSystemd(ctx, gwCfg); err != nil {
|
||||||
|
|||||||
109
core/pkg/namespace/restore_webrtc_test.go
Normal file
109
core/pkg/namespace/restore_webrtc_test.go
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
package namespace
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
// Bugboard #25 — WebRTC config drift on restart.
|
||||||
|
//
|
||||||
|
// chooseRestoreWebRTC decides the gateway's WebRTC fields when a node
|
||||||
|
// restores namespace clusters from its local state file. The local state
|
||||||
|
// file is NOT updated by EnableWebRTC, so a namespace enabled after its
|
||||||
|
// state file was written has no SFU/TURN fields there — and because the
|
||||||
|
// from-disk restore runs first and succeeds, the DB-backed restore (which
|
||||||
|
// DOES read WebRTC) never runs. Result: the gateway config loses its
|
||||||
|
// webrtc block on every restart (SFU/TURN services keep running but the
|
||||||
|
// gateway reports configured:false and /v1/webrtc/turn/credentials 404s).
|
||||||
|
//
|
||||||
|
// These tests pin the precedence: state file when complete, DB fallback
|
||||||
|
// otherwise. The bug was the missing DB fallback.
|
||||||
|
|
||||||
|
func dbDisabled() (bool, int, string, string) { return false, 0, "", "" }
|
||||||
|
|
||||||
|
func dbEnabled(port int, domain, secret string) func() (bool, int, string, string) {
|
||||||
|
return func() (bool, int, string, string) { return true, port, domain, secret }
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
|
||||||
|
// State file has a full block → use it, and NEVER consult the DB
|
||||||
|
// (the lazy dbFetch must not be called — saves a query on the hot
|
||||||
|
// restart path).
|
||||||
|
dbCalled := false
|
||||||
|
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret",
|
||||||
|
func() (bool, int, string, string) { dbCalled = true; return dbDisabled() })
|
||||||
|
|
||||||
|
if dbCalled {
|
||||||
|
t.Error("DB fetch was called even though the state file was complete (should short-circuit)")
|
||||||
|
}
|
||||||
|
if !got.enabled || got.sfuPort != 7800 || got.turnSecret != "state-secret" {
|
||||||
|
t.Errorf("want state-file values; got %+v", got)
|
||||||
|
}
|
||||||
|
if got.turnDomain != "turn.ns-x.dbrs.space" {
|
||||||
|
t.Errorf("turnDomain = %q; want state-file value", got.turnDomain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_staleStateFallsBackToDB(t *testing.T) {
|
||||||
|
// The actual bug-25 case: state file has NO webrtc (stale — written
|
||||||
|
// before enable), but the DB says enabled. MUST fall back to the DB
|
||||||
|
// so the block re-materializes instead of being silently dropped.
|
||||||
|
got := chooseRestoreWebRTC(false, 0, "", "",
|
||||||
|
dbEnabled(7801, "turn.ns-anchat-test.dbrs.space", "db-secret"))
|
||||||
|
|
||||||
|
if !got.enabled {
|
||||||
|
t.Fatal("BUG #25 REGRESSION: stale state file + DB-enabled WebRTC must fall back to DB; got disabled")
|
||||||
|
}
|
||||||
|
if got.sfuPort != 7801 {
|
||||||
|
t.Errorf("sfuPort = %d; want 7801 (from DB)", got.sfuPort)
|
||||||
|
}
|
||||||
|
if got.turnSecret != "db-secret" {
|
||||||
|
t.Errorf("turnSecret = %q; want db-secret (from DB)", got.turnSecret)
|
||||||
|
}
|
||||||
|
if got.turnDomain != "turn.ns-anchat-test.dbrs.space" {
|
||||||
|
t.Errorf("turnDomain = %q; want DB-derived value", got.turnDomain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_bothEmptyDisabled(t *testing.T) {
|
||||||
|
// Namespace genuinely without WebRTC: state file empty, DB disabled.
|
||||||
|
// Must return disabled so we don't register broken webrtc routes.
|
||||||
|
got := chooseRestoreWebRTC(false, 0, "", "", dbDisabled)
|
||||||
|
if got.enabled {
|
||||||
|
t.Errorf("want disabled when neither source has WebRTC; got %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_incompleteStateFileFallsToDB(t *testing.T) {
|
||||||
|
// State file partially populated (HasSFU but missing secret, or
|
||||||
|
// port 0) must NOT be treated as complete — fall through to DB.
|
||||||
|
// Catches a regression where a half-written state file shadows the
|
||||||
|
// DB and yields a broken (secret-less) gateway config.
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
hasSFU bool
|
||||||
|
sfuPort int
|
||||||
|
turnSec string
|
||||||
|
}{
|
||||||
|
{"hasSFU but port 0", true, 0, "s"},
|
||||||
|
{"hasSFU but empty secret", true, 7800, ""},
|
||||||
|
{"no hasSFU flag", false, 7800, "s"},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
got := chooseRestoreWebRTC(tc.hasSFU, tc.sfuPort, "d", tc.turnSec,
|
||||||
|
dbEnabled(9000, "turn.db", "db-secret"))
|
||||||
|
if !got.enabled || got.sfuPort != 9000 || got.turnSecret != "db-secret" {
|
||||||
|
t.Errorf("incomplete state file should fall back to DB; got %+v", got)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_dbIncompleteStaysDisabled(t *testing.T) {
|
||||||
|
// Defensive: if the DB row exists but is itself incomplete (no port
|
||||||
|
// or no secret — e.g. a half-provisioned enable), do NOT enable with
|
||||||
|
// a broken block. Better disabled than registering routes that 500.
|
||||||
|
got := chooseRestoreWebRTC(false, 0, "", "",
|
||||||
|
func() (bool, int, string, string) { return true, 0, "turn.db", "" })
|
||||||
|
if got.enabled {
|
||||||
|
t.Errorf("DB row incomplete (port 0, no secret): want disabled; got %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user