feat(gateway): implement stealth TURN discovery and configuration

- Add `turn_stealth_domain` to gateway config for stealth TURN support
- Introduce `turn_discovery` in `sni-router` to auto-discover per-namespace routes
- Add database migration to enable stealth TURN per namespace
- Document ephemeral state API in `SERVERLESS.md`
This commit is contained in:
anonpenguin23 2026-06-11 07:04:50 +03:00
parent f192cd0b84
commit b9d5f542e1
48 changed files with 3054 additions and 256 deletions

View File

@ -74,6 +74,10 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
SFUPort int `yaml:"sfu_port"`
TURNDomain string `yaml:"turn_domain"`
TURNSecret string `yaml:"turn_secret"`
// TURNStealthDomain is the neutral stealth TURNS:443 host (feat-124).
// Maps to cfg.StealthCDNDomain so turn.credentials advertises the
// stealth rung of the URI ladder.
TURNStealthDomain string `yaml:"turn_stealth_domain"`
}
type yamlCfg struct {
@ -256,6 +260,9 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
if v := strings.TrimSpace(y.WebRTC.TURNSecret); v != "" {
cfg.TURNSecret = v
}
if v := strings.TrimSpace(y.WebRTC.TURNStealthDomain); v != "" {
cfg.StealthCDNDomain = v
}
// Validate configuration
if errs := cfg.ValidateConfig(); len(errs) > 0 {

View File

@ -32,6 +32,18 @@
// backend:
// name: gateway
// addr: "127.0.0.1:8443"
// turn_discovery:
// namespaces_dir: /opt/orama/.orama/data/namespaces
// base_domain: orama-devnet.network
// rescan_interval: 30s
//
// When the turn_discovery.namespaces_dir is set, the router additionally scans
// <namespaces_dir>/*/configs/turn-*.yaml every rescan_interval and derives two
// routes per namespace with a TURNS listener — the bland stealth host and a
// "turn.ns-<namespace>.<base_domain>" alias — both forwarding to that
// namespace's local TURNS port. Discovered routes are merged with the static
// routes above (static wins on conflict); a transient scan error keeps the
// previously-installed routes.
package main
import (
@ -69,14 +81,29 @@ type yamlRoute struct {
Backend yamlBackend `yaml:"backend"`
}
// yamlTURNDiscovery mirrors sniproxy.TURNDiscoveryConfig for YAML decoding.
// When present and namespaces_dir is set, the router auto-discovers per-
// namespace stealth-TURN routes by scanning <namespaces_dir>/*/configs/turn-*.yaml.
type yamlTURNDiscovery struct {
NamespacesDir string `yaml:"namespaces_dir"`
BaseDomain string `yaml:"base_domain"`
RescanInterval time.Duration `yaml:"rescan_interval"`
}
// yamlConfig is the on-disk configuration shape.
type yamlConfig struct {
Listen string `yaml:"listen"`
ClientHelloTimeout time.Duration `yaml:"client_hello_timeout"`
BackendDialTimeout time.Duration `yaml:"backend_dial_timeout"`
MaxConcurrentConns int `yaml:"max_concurrent_conns"`
Fallback yamlBackend `yaml:"fallback"`
Routes []yamlRoute `yaml:"routes"`
Listen string `yaml:"listen"`
ClientHelloTimeout time.Duration `yaml:"client_hello_timeout"`
BackendDialTimeout time.Duration `yaml:"backend_dial_timeout"`
MaxConcurrentConns int `yaml:"max_concurrent_conns"`
Fallback yamlBackend `yaml:"fallback"`
Routes []yamlRoute `yaml:"routes"`
TURNDiscovery yamlTURNDiscovery `yaml:"turn_discovery"`
}
// discoveryEnabled reports whether TURN route auto-discovery is configured.
func (y *yamlConfig) discoveryEnabled() bool {
return y.TURNDiscovery.NamespacesDir != ""
}
func main() {
@ -94,25 +121,49 @@ func main() {
router := sniproxy.NewRouter(toBackend(cfg.Fallback))
// Hot-reload the route table from the config file so a namespace's
// cdn/turn SNI routes can be added or removed without restarting the
// router (Router.Replace swaps atomically under in-flight connections).
reloader := sniproxy.NewFileRouteReloader(configPath,
func() ([]sniproxy.Route, sniproxy.Backend, error) {
y, err := loadConfig(configPath)
if err != nil {
return nil, sniproxy.Backend{}, err
}
return toRoutes(y.Routes), toBackend(y.Fallback), nil
}, router, logger.Logger)
if err := reloader.Apply(); err != nil {
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
zap.Error(err))
os.Exit(1)
// The static routes (and fallback) always come from the config file; this
// closure is re-evaluated on every reload/rescan so a hand-edit to the
// config is picked up without a restart.
staticSource := func() ([]sniproxy.Route, sniproxy.Backend, error) {
y, err := loadConfig(configPath)
if err != nil {
return nil, sniproxy.Backend{}, err
}
return toRoutes(y.Routes), toBackend(y.Fallback), nil
}
routeStop := make(chan struct{})
defer close(routeStop)
go reloader.Watch(sniproxy.DefaultRouteReloadInterval, routeStop)
if cfg.discoveryEnabled() {
// Auto-discover per-namespace stealth-TURN routes by scanning the
// namespaces directory, merged with the static config routes (static
// wins on conflict), re-installed atomically every rescan_interval. A
// transient scan error keeps the previously-installed routes.
discoverer := sniproxy.NewTURNRouteDiscoverer(
sniproxy.TURNDiscoveryConfig{
NamespacesDir: cfg.TURNDiscovery.NamespacesDir,
BaseDomain: cfg.TURNDiscovery.BaseDomain,
RescanInterval: cfg.TURNDiscovery.RescanInterval,
}, staticSource, router, logger.Logger)
if err := discoverer.Apply(); err != nil {
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
zap.Error(err))
os.Exit(1)
}
go discoverer.Run(routeStop)
} else {
// No discovery configured: hot-reload the static route table from the
// config file so cdn/turn SNI routes can be added or removed without
// restarting (Router.Replace swaps atomically under in-flight conns).
reloader := sniproxy.NewFileRouteReloader(configPath, staticSource, router, logger.Logger)
if err := reloader.Apply(); err != nil {
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
zap.Error(err))
os.Exit(1)
}
go reloader.Watch(sniproxy.DefaultRouteReloadInterval, routeStop)
}
srv := sniproxy.NewServer(router, sniproxy.Config{
ClientHelloTimeout: cfg.ClientHelloTimeout,
@ -235,6 +286,16 @@ func validateConfig(y *yamlConfig) []string {
errs = append(errs, fmt.Sprintf("routes[%d].backend.addr: required", i))
}
}
// turn_discovery is optional, but when partially set (namespaces_dir XOR
// base_domain) it is almost certainly a misconfiguration, so validate the
// pair together via the library's own Validate.
if y.discoveryEnabled() || y.TURNDiscovery.BaseDomain != "" {
dc := sniproxy.TURNDiscoveryConfig{
NamespacesDir: y.TURNDiscovery.NamespacesDir,
BaseDomain: y.TURNDiscovery.BaseDomain,
}
errs = append(errs, dc.Validate()...)
}
return errs
}

View File

@ -187,6 +187,69 @@ The legacy `db_execute` is kept indefinitely so existing functions don't break.
|----------|-------------|
| `pubsub_publish(topic, dataJSON)` → bool | Publish message to a PubSub topic. Returns true on success. |
### Ephemeral State (WS-subscribe-tracked)
Short-lived per-subscriber state (typing indicators, presence, call ringing,
live cursors) that the gateway **auto-clears the moment the owning WebSocket
client disconnects** — no heartbeats, no prune crons. State also expires on a
TTL backstop (default 60 s, max 30 min). The owning client ID and namespace
come from the server-trusted invocation context; functions cannot spoof them.
| Function | Description |
|----------|-------------|
| `ephemeral_state_set(topic, key, payload, ttlMs)` → u32 | Record state owned by the CURRENT invocation's WS client and publish an `ephemeral.set` event on the topic. 1 = ok, 0 = failure (no WS client, empty topic/key, payload > 16 KiB, > 256 keys/client). |
| `ephemeral_state_clear(topic, key)` → u32 | Clear state this client owns; publishes `ephemeral.clear` (reason `explicit`). Idempotent — clearing a missing/non-owned key returns 1. |
| `ephemeral_state_list(topic)` → u64 | Reconnect catch-up read: packed `ptr<<32\|len` of a JSON envelope with the live entries on the topic. Works without a WS client (read-only). 0 on failure. |
Raw import signatures (pointer/length ABI — note `ttlMs` is **i64**):
```go
//go:wasmimport env ephemeral_state_set
func ephemeralStateSet(topicPtr *byte, topicLen uint32, keyPtr *byte, keyLen uint32,
payloadPtr *byte, payloadLen uint32, ttlMs int64) uint32
//go:wasmimport env ephemeral_state_clear
func ephemeralStateClear(topicPtr *byte, topicLen uint32, keyPtr *byte, keyLen uint32) uint32
//go:wasmimport env ephemeral_state_list
func ephemeralStateList(topicPtr *byte, topicLen uint32) uint64 // ptr<<32|len of JSON
```
Synthetic events are published **on the same topic** the state lives on, with
the `_orama` control-frame discriminator (same dispatch pattern as the
`auth.refresh` frame). Subscribers update their local view from the stream:
```json
{"_orama":"ephemeral.set", "topic":"typing:room1", "key":"user-7", "client_id":"ws-abc", "payload":"<base64>"}
{"_orama":"ephemeral.clear","topic":"typing:room1", "key":"user-7", "client_id":"ws-abc", "reason":"disconnect"}
```
`reason` is `explicit` (function called clear), `disconnect` (owning WS client
went away — the zero-lag path), or `expired` (TTL backstop). `payload` is
base64 (Go `[]byte` JSON encoding) and present only on `ephemeral.set`.
`ephemeral_state_list` returns:
```json
{"entries":[{"key":"user-7","client_id":"ws-abc","payload":"<base64>","expires_in_ms":48211}]}
```
Typing-indicator shape (called from a `ws_persistent` rpc-router function):
```go
// Client sends {"op":"typing.start","room":"room1","user":"user-7"} → handler:
ephemeralStateSet(ptr("typing:"+room), len32("typing:"+room),
ptr(userID), len32(userID), nil, 0, 30_000) // 30s TTL backstop
// Client sends typing.stop → handler:
ephemeralStateClear(ptr("typing:"+room), len32("typing:"+room), ptr(userID), len32(userID))
// No typing.stop needed on app kill / network drop: the WS disconnect publishes
// {"_orama":"ephemeral.clear",...,"reason":"disconnect"} to every subscriber
// immediately. On (re)connect, call ephemeral_state_list("typing:"+room) once
// to seed local state, then track the event stream.
```
### Logging
| Function | Description |

View File

@ -0,0 +1,16 @@
-- =============================================================================
-- 030_webrtc_stealth.sql
--
-- Stealth TURNS-over-443 per namespace — feat-124 (censorship-resistant
-- calling). When stealth_enabled is true the namespace's TURN servers carry a
-- second TLS certificate for the neutral stealth hostname
-- (cdn-<hash>.<base-domain>, derived via turn.StealthHostForNamespace), the
-- SNI router forwards :443 ClientHellos for that hostname to the TURN TLS
-- listener, and turn.credentials advertises `turns:<stealth-host>:443` as the
-- final rung of the ICE URI ladder.
--
-- Default false → backward compatible: existing WebRTC namespaces keep the
-- baseline udp:3478 / tcp:3478 / turns:5349 URIs unchanged.
-- =============================================================================
ALTER TABLE namespace_webrtc_config ADD COLUMN stealth_enabled BOOLEAN DEFAULT FALSE;

View File

@ -79,6 +79,8 @@ func showNamespaceHelp() {
fmt.Printf(" repair <namespace> - Repair an under-provisioned namespace cluster\n")
fmt.Printf(" enable webrtc --namespace NS - Enable WebRTC (SFU + TURN) for a namespace\n")
fmt.Printf(" disable webrtc --namespace NS - Disable WebRTC for a namespace\n")
fmt.Printf(" enable webrtc-stealth --namespace NS - Enable stealth TURNS over :443 (feat-124)\n")
fmt.Printf(" disable webrtc-stealth --namespace NS - Disable stealth TURNS\n")
fmt.Printf(" webrtc-status --namespace NS - Show WebRTC service status\n")
fmt.Printf(" help - Show this help message\n\n")
fmt.Printf("Flags:\n")
@ -226,8 +228,12 @@ func handleNamespaceDelete(force bool) {
func handleNamespaceEnable(args []string) {
feature := args[0]
if feature == "webrtc-stealth" {
handleNamespaceStealthToggle(args[1:], true)
return
}
if feature != "webrtc" {
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc\n", feature)
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc, webrtc-stealth\n", feature)
os.Exit(1)
}
@ -283,10 +289,82 @@ func handleNamespaceEnable(args []string) {
fmt.Printf(" TURN instances: 2 nodes (relay on public IPs)\n")
}
// handleNamespaceStealthToggle drives /v1/namespace/webrtc/stealth/{enable|disable}
// (feat-124 — censorship-resistant TURNS over :443).
func handleNamespaceStealthToggle(args []string, enable bool) {
verb := "disable"
if enable {
verb = "enable"
}
var ns string
fs := flag.NewFlagSet("namespace "+verb+" webrtc-stealth", flag.ExitOnError)
fs.StringVar(&ns, "namespace", "", "Namespace name")
_ = fs.Parse(args)
if ns == "" {
fmt.Fprintf(os.Stderr, "Usage: orama namespace %s webrtc-stealth --namespace <name>\n", verb)
os.Exit(1)
}
gatewayURL, apiKey := loadAuthForNamespace(ns)
if enable {
fmt.Printf("Enabling WebRTC stealth (TURNS over :443) for namespace '%s'...\n", ns)
fmt.Printf("This provisions a Let's Encrypt cert for the neutral stealth host and may take up to ~2 minutes.\n")
} else {
fmt.Printf("Disabling WebRTC stealth for namespace '%s'...\n", ns)
}
url := fmt.Sprintf("%s/v1/namespace/webrtc/stealth/%s", gatewayURL, verb)
req, err := http.NewRequest(http.MethodPost, url, nil)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create request: %v\n", err)
os.Exit(1)
}
req.Header.Set("Authorization", "Bearer "+apiKey)
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
}
resp, err := client.Do(req)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to connect to gateway: %v\n", err)
os.Exit(1)
}
defer resp.Body.Close()
var result map[string]interface{}
json.NewDecoder(resp.Body).Decode(&result)
if resp.StatusCode != http.StatusOK {
errMsg := "unknown error"
if e, ok := result["error"].(string); ok {
errMsg = e
}
fmt.Fprintf(os.Stderr, "Failed to %s WebRTC stealth: %s\n", verb, errMsg)
os.Exit(1)
}
if enable {
fmt.Printf("WebRTC stealth enabled for namespace '%s'.\n", ns)
fmt.Printf(" turn.credentials now advertises the full URI ladder including turns:<stealth-host>:443.\n")
fmt.Printf(" Make sure the SNI router is enabled on the TURN nodes (node.yaml sni_router.enabled).\n")
} else {
fmt.Printf("WebRTC stealth disabled for namespace '%s'.\n", ns)
}
}
func handleNamespaceDisable(args []string) {
feature := args[0]
if feature == "webrtc-stealth" {
handleNamespaceStealthToggle(args[1:], false)
return
}
if feature != "webrtc" {
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc\n", feature)
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc, webrtc-stealth\n", feature)
os.Exit(1)
}

View File

@ -230,9 +230,54 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri
return "", fmt.Errorf("failed to populate webrtc config: %w", err)
}
// Stealth TURN SNI router (feat-124). Like the webrtc block, sni_router is
// an operator opt-in that only exists in the previous node.yaml, so carry
// it forward across regeneration. Without this, a Phase4 regen would reset
// sni_router.enabled to false, stop the :443 router and break stealth TURN
// for every region that relies on it (the same regen-wipe class of outage
// as bugboard #259/#846).
cg.populateSNIRouterConfig(&data)
return templates.RenderNodeConfig(data)
}
// populateSNIRouterConfig carries forward the operator-set sni_router.enabled
// flag from the existing node.yaml so a config regeneration never silently
// disables the stealth TURN-over-443 router. Absence of the file or block
// leaves the flag at its default (false).
func (cg *ConfigGenerator) populateSNIRouterConfig(data *templates.NodeConfigData) {
data.SNIRouterEnabled = cg.readExistingSNIRouterEnabled()
}
// SNIRouterEnabled reports whether the node's on-disk node.yaml has opted in to
// the stealth TURN-over-443 SNI router. The orchestrator reads this AFTER
// Phase4 has written node.yaml to decide whether to move Caddy to :8443 and
// start the router unit. Returns false when the config or block is absent.
func (cg *ConfigGenerator) SNIRouterEnabled() bool {
return cg.readExistingSNIRouterEnabled()
}
// readExistingSNIRouterEnabled parses just the top-level sni_router.enabled
// flag out of the existing node.yaml. Returns false when the file is missing,
// malformed, or has no sni_router block (fresh install / not opted in).
func (cg *ConfigGenerator) readExistingSNIRouterEnabled() bool {
configPath := filepath.Join(cg.oramaDir, "configs", "node.yaml")
raw, err := os.ReadFile(configPath)
if err != nil {
return false // No existing config (fresh install) — default off.
}
var parsed struct {
SNIRouter struct {
Enabled bool `yaml:"enabled"`
} `yaml:"sni_router"`
}
if err := yaml.Unmarshal(raw, &parsed); err != nil {
return false // Malformed/old config — don't fail regen; default off.
}
return parsed.SNIRouter.Enabled
}
// existingWebRTC is the minimal shape parsed out of an existing node.yaml to
// carry forward operator-set WebRTC fields across a config regeneration.
type existingWebRTC struct {

View File

@ -23,7 +23,8 @@ type BinaryInstaller struct {
gateway *installers.GatewayInstaller
coredns *installers.CoreDNSInstaller
caddy *installers.CaddyInstaller
ntfy *installers.NtfyInstaller // feature #72; installed only when EnableNtfy is set
ntfy *installers.NtfyInstaller // feature #72; installed only when EnableNtfy is set
sniRouter *installers.SNIRouterInstaller // feat-124; configured only when sni_router.enabled
}
// NewBinaryInstaller creates a new binary installer
@ -41,6 +42,7 @@ func NewBinaryInstaller(arch string, logWriter io.Writer) *BinaryInstaller {
coredns: installers.NewCoreDNSInstaller(arch, logWriter, oramaHome),
caddy: installers.NewCaddyInstaller(arch, logWriter, oramaHome),
ntfy: installers.NewNtfyInstaller(arch, logWriter),
sniRouter: installers.NewSNIRouterInstaller(arch, logWriter, OramaDir),
}
}
@ -158,6 +160,29 @@ func (bi *BinaryInstaller) EnableCaddyNtfyProxy(hostname string) {
bi.caddy.EnableNtfyProxy(hostname)
}
// EnableCaddySNIRouterMode moves Caddy's HTTPS listener off :443 to :8443 on
// the next ConfigureCaddy() call, freeing :443 for the orama-sni-router
// (feat-124). Must be called BEFORE ConfigureCaddy.
func (bi *BinaryInstaller) EnableCaddySNIRouterMode() {
bi.caddy.EnableSNIRouterMode()
}
// ConfigureSNIRouter writes the orama-sni-router YAML config (listen :443,
// fallback Caddy on :8443, turn_discovery for baseDomain). Feat-124.
func (bi *BinaryInstaller) ConfigureSNIRouter(baseDomain string) error {
return bi.sniRouter.Configure(baseDomain)
}
// WriteSNIRouterUnit writes /etc/systemd/system/orama-sni-router.service.
func (bi *BinaryInstaller) WriteSNIRouterUnit() error {
return bi.sniRouter.WriteSystemdUnit()
}
// SNIRouterServiceName returns the systemd unit name for lifecycle calls.
func (bi *BinaryInstaller) SNIRouterServiceName() string {
return installers.SNIRouterServiceName
}
// InstallNtfy installs the self-hosted ntfy server (binary, user,
// systemd unit, data directory). Feature #72. Idempotent.
func (bi *BinaryInstaller) InstallNtfy() error {

View File

@ -27,8 +27,20 @@ type CaddyInstaller struct {
// Enabled per-node via EnableNtfyProxy. Feature #72.
withNtfy bool
ntfyHostname string // e.g. "push.dbrs.space" — fully-qualified public host
// behindSNIRouter, when set, moves Caddy's HTTPS listener off :443 to
// CaddyHTTPSPortBehindSNI so the orama-sni-router can own :443 and forward
// TLS by SNI (feat-124, stealth TURN). Enabled per-node via
// EnableSNIRouterMode. Plain HTTP (:80) is unaffected. When false the
// generated Caddyfile is byte-identical to the pre-feature output.
behindSNIRouter bool
}
// CaddyHTTPSPortBehindSNI is the port Caddy binds for HTTPS when the node runs
// behind the SNI router (which owns :443). 8443 matches the sni-router config's
// caddy fallback backend (127.0.0.1:8443) and the plan doc.
const CaddyHTTPSPortBehindSNI = 8443
// NewCaddyInstaller creates a new Caddy installer
func NewCaddyInstaller(arch string, logWriter io.Writer, oramaHome string) *CaddyInstaller {
return &CaddyInstaller{
@ -52,6 +64,16 @@ func (ci *CaddyInstaller) EnableNtfyProxy(hostname string) {
ci.ntfyHostname = hostname
}
// EnableSNIRouterMode tells the Caddy installer to bind HTTPS on
// CaddyHTTPSPortBehindSNI (8443) instead of :443, freeing :443 for the
// orama-sni-router (feat-124). Plain HTTP on :80 is left untouched. Must be
// called BEFORE Configure so the generated Caddyfile picks up the global
// `https_port` option. A no-op when never called: the default Caddyfile keeps
// HTTPS on :443.
func (ci *CaddyInstaller) EnableSNIRouterMode() {
ci.behindSNIRouter = true
}
// IsInstalled checks if Caddy with orama DNS module is already installed
func (ci *CaddyInstaller) IsInstalled() bool {
caddyPath := "/usr/bin/caddy"
@ -417,7 +439,17 @@ func (ci *CaddyInstaller) generateCaddyfile(domain, email, acmeEndpoint, baseDom
// workload is REST + WebSocket (neither benefits much from
// h2 stream multiplexing — REST is keep-alive over h1, and
// WS is single-connection by design).
sb.WriteString(fmt.Sprintf("{\n email %s\n servers {\n protocols h1\n }\n}\n", email))
// When this node runs behind the SNI router (feat-124), move Caddy's HTTPS
// listener off :443 to CaddyHTTPSPortBehindSNI via the `https_port` global
// option. The sni-router owns :443 and forwards TLS by SNI to either a
// namespace's TURNS listener or here (127.0.0.1:8443). Plain HTTP (:80) is
// unchanged. When behindSNIRouter is false, no `https_port` line is emitted
// and the Caddyfile is byte-identical to the pre-feature output.
httpsPortOption := ""
if ci.behindSNIRouter {
httpsPortOption = fmt.Sprintf(" https_port %d\n", CaddyHTTPSPortBehindSNI)
}
sb.WriteString(fmt.Sprintf("{\n email %s\n%s servers {\n protocols h1\n }\n}\n", email, httpsPortOption))
// Node domain blocks (e.g., node1.dbrs.space, *.node1.dbrs.space)
sb.WriteString(fmt.Sprintf("\n*.%s {\n%s\n reverse_proxy localhost:6001\n}\n", domain, tlsBlock))

View File

@ -1,6 +1,7 @@
package installers
import (
"fmt"
"io"
"strings"
"testing"
@ -97,3 +98,50 @@ func TestGenerateCaddyfile_BaseDomainSameAsDomainOmitsDuplicates(t *testing.T) {
t.Errorf("expected exactly 2 `*.dbrs.space {` occurrences (1 TLS + 1 HTTP), got %d in:\n%s", got, cf)
}
}
// TestGenerateCaddyfile_SNIRouterDisabledByteIdentical is the safety guard for
// feat-124: when EnableSNIRouterMode has NOT been called, the generated
// Caddyfile must be byte-identical to the pre-feature output (HTTPS stays on
// :443, no `https_port` global option). This is the default for every existing
// node — any drift here is a silent production change.
func TestGenerateCaddyfile_SNIRouterDisabledByteIdentical(t *testing.T) {
ci := newTestCaddyInstaller()
cf := ci.generateCaddyfile("node1.dbrs.space", "admin@dbrs.space",
"http://localhost:6001/v1/internal/acme", "dbrs.space")
if strings.Contains(cf, "https_port") {
t.Errorf("default Caddyfile must NOT contain `https_port` (SNI router off); got:\n%s", cf)
}
if strings.Contains(cf, "8443") {
t.Errorf("default Caddyfile must NOT reference :8443 (SNI router off); got:\n%s", cf)
}
// The global options block must be exactly the pre-feature shape.
if !strings.Contains(cf, "{\n email admin@dbrs.space\n servers {\n protocols h1\n }\n}\n") {
t.Errorf("default global options block drifted from pre-feature output; got:\n%s", cf)
}
}
// TestGenerateCaddyfile_SNIRouterEnabledMovesHTTPSTo8443 verifies that after
// EnableSNIRouterMode, Caddy's HTTPS listener is moved to :8443 via the
// `https_port` global option, while plain HTTP (:80) is unchanged so ACME
// HTTP-01 and the HTTP catch-all still work.
func TestGenerateCaddyfile_SNIRouterEnabledMovesHTTPSTo8443(t *testing.T) {
ci := newTestCaddyInstaller()
ci.EnableSNIRouterMode()
cf := ci.generateCaddyfile("node1.dbrs.space", "admin@dbrs.space",
"http://localhost:6001/v1/internal/acme", "dbrs.space")
want := fmt.Sprintf("https_port %d", CaddyHTTPSPortBehindSNI)
if !strings.Contains(cf, want) {
t.Errorf("SNI-router Caddyfile must contain %q; got:\n%s", want, cf)
}
// The global option belongs inside the top-level options block, before the
// servers stanza.
if !strings.Contains(cf, "{\n email admin@dbrs.space\n https_port 8443\n servers {\n protocols h1\n }\n}\n") {
t.Errorf("https_port not placed correctly in global options block; got:\n%s", cf)
}
// Plain HTTP :80 catch-all must be unchanged.
if !strings.Contains(cf, ":80 {") {
t.Errorf("HTTP :80 block must remain when SNI router enabled; got:\n%s", cf)
}
}

View File

@ -0,0 +1,203 @@
package installers
import (
"fmt"
"io"
"os"
"path/filepath"
)
// SNI router installer (feat-124, stealth TURN-over-443).
//
// Unlike the binary installers (Caddy, ntfy), the orama-sni-router binary is
// built and shipped to the node by `orama build` / the install tarball — this
// installer only writes the router's YAML config and the systemd unit, and
// drives the unit's lifecycle (install+enable+start when enabled,
// stop+disable when not).
const (
// SNIRouterListenAddr is the public port the router binds. It owns :443 so
// Caddy is moved to CaddyHTTPSPortBehindSNI (see caddy.go).
SNIRouterListenAddr = ":443"
// SNIRouterServiceName is the systemd unit name.
SNIRouterServiceName = "orama-sni-router.service"
// SNIRouterConfigName is the router config filename (resolved under
// <oramaDir>/configs by the binary's config.DefaultPath lookup).
SNIRouterConfigName = "sni-router.yaml"
// sniRouterRescanInterval is how often the router rescans the namespaces
// directory for per-namespace TURNS listeners. Matches the library default
// (sniproxy.DefaultDiscoveryRescanInterval); kept as a literal here to avoid
// importing the runtime package into the installer.
sniRouterRescanInterval = "30s"
// sniRouterClientHelloTimeout / sniRouterBackendDialTimeout bound the
// per-connection ClientHello peek and backend dial (slowloris / dead-backend
// protection). Mirror the sniproxy server defaults.
sniRouterClientHelloTimeout = "5s"
sniRouterBackendDialTimeout = "5s"
// sniRouterMaxConcurrentConns caps in-flight connections on the public
// :443 listener (DoS guard); mirrors the sniproxy server default.
sniRouterMaxConcurrentConns = 10000
// sniRouterSystemdUnitPath is where the unit file is written.
sniRouterSystemdUnitPath = "/etc/systemd/system/" + SNIRouterServiceName
// sniRouterBinaryPath is the installed binary path on the node.
sniRouterBinaryPath = "/opt/orama/bin/orama-sni-router"
)
// SNIRouterInstaller writes the orama-sni-router config + systemd unit and
// manages the unit lifecycle. The caddy fallback port matches
// CaddyHTTPSPortBehindSNI so unmatched SNIs (regular HTTPS) reach the moved
// Caddy listener.
type SNIRouterInstaller struct {
*BaseInstaller
oramaDir string // e.g. "/opt/orama/.orama"
}
// NewSNIRouterInstaller creates an installer. oramaDir is the node's .orama
// data root (where configs/ and data/namespaces live).
func NewSNIRouterInstaller(arch string, logWriter io.Writer, oramaDir string) *SNIRouterInstaller {
return &SNIRouterInstaller{
BaseInstaller: NewBaseInstaller(arch, logWriter),
oramaDir: oramaDir,
}
}
// configPath returns the absolute path the router config is written to and the
// binary resolves to via its DefaultPath lookup (<oramaDir>/configs/<name>).
func (si *SNIRouterInstaller) configPath() string {
return filepath.Join(si.oramaDir, "configs", SNIRouterConfigName)
}
// namespacesDir returns the per-namespace config root the router scans for
// TURNS listeners.
func (si *SNIRouterInstaller) namespacesDir() string {
return filepath.Join(si.oramaDir, "data", "namespaces")
}
// Configure writes the router YAML config. baseDomain drives the stealth and
// "turn.ns-*" SNI hostnames the router derives during discovery. Idempotent.
func (si *SNIRouterInstaller) Configure(baseDomain string) error {
if baseDomain == "" {
return fmt.Errorf("sni-router: base domain must not be empty")
}
configDir := filepath.Dir(si.configPath())
if err := os.MkdirAll(configDir, 0755); err != nil {
return fmt.Errorf("sni-router: create config dir %s: %w", configDir, err)
}
content := si.generateConfig(baseDomain)
if err := os.WriteFile(si.configPath(), []byte(content), 0644); err != nil {
return fmt.Errorf("sni-router: write config %s: %w", si.configPath(), err)
}
return nil
}
// generateConfig renders the sni-router.yaml. The fallback is Caddy on
// CaddyHTTPSPortBehindSNI; turn_discovery scans the node's namespaces dir so
// per-namespace TURNS routes appear without a router restart. No static routes
// are emitted — every TURNS route is auto-discovered.
func (si *SNIRouterInstaller) generateConfig(baseDomain string) string {
return fmt.Sprintf(`# Orama SNI router config (feat-124, stealth TURN-over-443).
# Generated by the installer re-running install/upgrade overwrites this file.
#
# The router owns :443, peeks each connection's TLS ClientHello SNI, and
# forwards the raw (still-encrypted) stream to a backend. TLS is NOT terminated
# here. Unmatched SNIs (regular HTTPS) go to the fallback (Caddy on :%[2]d).
listen: "%[1]s"
client_hello_timeout: %[3]s
backend_dial_timeout: %[4]s
max_concurrent_conns: %[5]d
fallback:
name: caddy
addr: "127.0.0.1:%[2]d"
# Per-namespace stealth-TURN routes are auto-discovered by scanning
# <namespaces_dir>/*/configs/turn-*.yaml every rescan_interval. Each namespace
# with a TURNS listener gets two routes (the bland stealth host and a
# turn.ns-<namespace>.<base_domain> alias) forwarding to its local TURNS port.
turn_discovery:
namespaces_dir: %[6]q
base_domain: %[7]q
rescan_interval: %[8]s
# No static routes: every TURNS route comes from turn_discovery above.
routes: []
`,
SNIRouterListenAddr,
CaddyHTTPSPortBehindSNI,
sniRouterClientHelloTimeout,
sniRouterBackendDialTimeout,
sniRouterMaxConcurrentConns,
si.namespacesDir(),
baseDomain,
sniRouterRescanInterval,
)
}
// generateSystemdUnit renders /etc/systemd/system/orama-sni-router.service.
// Runs as the orama user with CAP_NET_BIND_SERVICE so it can bind :443 without
// root. Ordered Before=caddy.service so the router is ready before Caddy
// switches to :8443. Restart=on-failure.
func (si *SNIRouterInstaller) generateSystemdUnit() string {
return fmt.Sprintf(`[Unit]
Description=Orama SNI Router (TLS-level :443 backend forwarder)
Documentation=https://github.com/DeBrosOfficial/network
After=network.target
Before=caddy.service
PartOf=orama-node.service
[Service]
Type=simple
WorkingDirectory=/opt/orama
EnvironmentFile=-/opt/orama/.orama/data/sni-router.env
ExecStart=%s --config %s
# Bind privileged ports (:80, :443) without running as root.
AmbientCapabilities=CAP_NET_BIND_SERVICE
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
User=orama
Group=orama
NoNewPrivileges=yes
ProtectSystem=strict
ProtectHome=yes
PrivateTmp=yes
LimitNOFILE=65536
TimeoutStopSec=15s
KillMode=mixed
KillSignal=SIGTERM
Restart=on-failure
RestartSec=5s
StandardOutput=journal
StandardError=journal
SyslogIdentifier=orama-sni-router
[Install]
WantedBy=multi-user.target
`, sniRouterBinaryPath, si.configPath())
}
// WriteSystemdUnit writes the unit file. Idempotent.
func (si *SNIRouterInstaller) WriteSystemdUnit() error {
if err := os.WriteFile(sniRouterSystemdUnitPath, []byte(si.generateSystemdUnit()), 0644); err != nil {
return fmt.Errorf("sni-router: write systemd unit %s: %w", sniRouterSystemdUnitPath, err)
}
return nil
}
// IsInstalled reports whether the router binary is present on the node.
func (si *SNIRouterInstaller) IsInstalled() bool {
_, err := os.Stat(sniRouterBinaryPath)
return err == nil
}

View File

@ -0,0 +1,102 @@
package installers
import (
"io"
"os"
"path/filepath"
"strings"
"testing"
)
// newTestSNIRouterInstaller returns an installer rooted at a temp oramaDir so
// Configure writes to an isolated location.
func newTestSNIRouterInstaller(oramaDir string) *SNIRouterInstaller {
return NewSNIRouterInstaller("amd64", io.Discard, oramaDir)
}
// TestGenerateConfig_includesDiscoveryAndFallback verifies the rendered
// sni-router.yaml binds :443, falls back to Caddy on the moved HTTPS port, and
// emits a turn_discovery block pointing at the node's namespaces dir + base
// domain.
func TestGenerateConfig_includesDiscoveryAndFallback(t *testing.T) {
dir := t.TempDir()
si := newTestSNIRouterInstaller(dir)
cfg := si.generateConfig("orama-devnet.network")
for _, want := range []string{
`listen: ":443"`,
"fallback:",
`addr: "127.0.0.1:8443"`,
"turn_discovery:",
"base_domain: \"orama-devnet.network\"",
"rescan_interval: 30s",
"routes: []",
} {
if !strings.Contains(cfg, want) {
t.Errorf("generated sni-router config missing %q\n---\n%s", want, cfg)
}
}
// namespaces_dir must be the node's data/namespaces path.
wantNS := filepath.Join(dir, "data", "namespaces")
if !strings.Contains(cfg, wantNS) {
t.Errorf("config missing namespaces_dir %q\n---\n%s", wantNS, cfg)
}
}
// TestConfigure_writesFileToConfigsDir verifies Configure persists the YAML to
// <oramaDir>/configs/sni-router.yaml.
func TestConfigure_writesFileToConfigsDir(t *testing.T) {
dir := t.TempDir()
si := newTestSNIRouterInstaller(dir)
if err := si.Configure("example.com"); err != nil {
t.Fatalf("Configure failed: %v", err)
}
path := filepath.Join(dir, "configs", "sni-router.yaml")
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("expected config at %s: %v", path, err)
}
if !strings.Contains(string(data), "base_domain: \"example.com\"") {
t.Errorf("written config missing base_domain; got:\n%s", string(data))
}
}
// TestConfigure_rejectsEmptyBaseDomain verifies the installer refuses an empty
// base domain rather than emitting a config that would derive bogus hostnames.
func TestConfigure_rejectsEmptyBaseDomain(t *testing.T) {
si := newTestSNIRouterInstaller(t.TempDir())
if err := si.Configure(""); err == nil {
t.Errorf("expected error for empty base domain")
}
}
// TestGenerateSystemdUnit_shape verifies the unit grants CAP_NET_BIND_SERVICE,
// runs as orama, restarts on failure, and points ExecStart at the installed
// binary + config.
func TestGenerateSystemdUnit_shape(t *testing.T) {
dir := t.TempDir()
si := newTestSNIRouterInstaller(dir)
unit := si.generateSystemdUnit()
for _, want := range []string{
"AmbientCapabilities=CAP_NET_BIND_SERVICE",
"User=orama",
"Restart=on-failure",
"EnvironmentFile=-/opt/orama/.orama/data/sni-router.env",
// ExecStart must point at the ABSOLUTE config path so it doesn't
// depend on WorkingDirectory/$HOME resolution at runtime.
"ExecStart=/opt/orama/bin/orama-sni-router --config " + si.configPath(),
"Before=caddy.service",
} {
if !strings.Contains(unit, want) {
t.Errorf("systemd unit missing %q\n---\n%s", want, unit)
}
}
if !strings.Contains(si.configPath(), dir) {
t.Errorf("configPath %q not rooted at the oramaDir %q", si.configPath(), dir)
}
}

View File

@ -741,11 +741,35 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s
ps.logf(" ✓ ntfy config generated (base_url: %s)", ntfyBaseURL)
}
// Stealth TURN-over-443 (feat-124): when the node opted in
// (sni_router.enabled in the node.yaml just written above), Caddy
// must vacate :443 so the orama-sni-router can own it. Move Caddy's
// HTTPS listener to :8443 BEFORE ConfigureCaddy renders the Caddyfile.
// When not opted in, the Caddyfile is byte-identical to before.
if ps.configGenerator.SNIRouterEnabled() {
ps.binaryInstaller.EnableCaddySNIRouterMode()
ps.logf(" ✓ SNI router enabled — Caddy HTTPS will bind :8443")
}
if err := ps.binaryInstaller.ConfigureCaddy(caddyDomain, email, acmeEndpoint, baseDomain); err != nil {
ps.logf(" ⚠️ Caddy config warning: %v", err)
} else {
ps.logf(" ✓ Caddy config generated")
}
// Stealth TURN-over-443 (feat-124): when opted in, write the
// orama-sni-router config (listen :443, fallback Caddy :8443,
// turn_discovery scanning this node's namespaces dir for the cluster's
// base domain). The unit lifecycle is driven in Phase5 after Caddy has
// moved to :8443. The router uses the base domain as the zone for
// stealth/turn.ns-* hostnames.
if ps.configGenerator.SNIRouterEnabled() {
if err := ps.binaryInstaller.ConfigureSNIRouter(dnsZone); err != nil {
ps.logf(" ⚠️ SNI router config warning: %v", err)
} else {
ps.logf(" ✓ SNI router config generated (zone: %s)", dnsZone)
}
}
}
return nil
@ -871,6 +895,14 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error {
}
}
// SNI router unit (feat-124). Write the unit whenever the binary is present
// so the daemon-reload below picks it up; the enable/start vs stop/disable
// decision (based on sni_router.enabled) happens after Caddy has moved to
// :8443, in the start section.
if ps.binaryInstaller.WriteSNIRouterUnit() == nil {
ps.logf(" ✓ SNI router service unit created: %s", ps.binaryInstaller.SNIRouterServiceName())
}
// Reload systemd daemon
if err := ps.serviceController.DaemonReload(); err != nil {
return fmt.Errorf("failed to reload systemd: %w", err)
@ -980,6 +1012,31 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error {
}
}
// Stealth TURN-over-443 (feat-124) cutover. Caddy has just been
// reconfigured to :8443 and restarted above, so :443 is now free for the
// SNI router. When opted in, enable+start the router; when not, stop+disable
// it so a node that flipped the flag off cleanly returns :443 to Caddy.
sniSvc := ps.binaryInstaller.SNIRouterServiceName()
if ps.configGenerator.SNIRouterEnabled() {
if err := ps.serviceController.EnableService(sniSvc); err != nil {
ps.logf(" ⚠️ Failed to enable %s: %v", sniSvc, err)
}
if err := ps.serviceController.RestartService(sniSvc); err != nil {
ps.logf(" ⚠️ Failed to start %s: %v", sniSvc, err)
} else {
ps.logf(" - %s started (owns :443)", sniSvc)
}
} else {
// Not opted in: ensure the router is not holding :443. Errors are
// non-fatal — the unit may simply not be loaded on this node.
if err := ps.serviceController.StopService(sniSvc); err != nil {
ps.logf(" %s not running (expected when disabled): %v", sniSvc, err)
}
if err := ps.serviceController.DisableService(sniSvc); err != nil {
ps.logf(" %s not enabled (expected when disabled): %v", sniSvc, err)
}
}
// Start ntfy on every node (#72). Caddy must already be up (it
// terminates TLS for push.<dnsZone>), which the order above
// guarantees.

View File

@ -0,0 +1,72 @@
package production
import (
"strings"
"testing"
)
// TestGenerateNodeConfig_preservesSNIRouterEnabled is the regression test for
// the feat-124 regen-wipe class of outage (cf. bugboard #259/#846 for webrtc):
// a config regeneration must NOT silently reset an operator's
// sni_router.enabled: true back to false, which would stop the :443 router and
// break stealth TURN. We write a node.yaml with the flag set, regenerate, and
// assert it survives.
func TestGenerateNodeConfig_preservesSNIRouterEnabled(t *testing.T) {
dir := t.TempDir()
writeNodeYAML(t, dir, `sni_router:
enabled: true
http_gateway:
enabled: true
`)
cg := NewConfigGenerator(dir)
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
if err != nil {
t.Fatalf("GenerateNodeConfig failed: %v", err)
}
if !strings.Contains(out, "sni_router:") {
t.Fatalf("regenerated node.yaml missing sni_router block\n---\n%s", out)
}
if !strings.Contains(out, "enabled: true") {
t.Errorf("regenerated node.yaml did not preserve sni_router.enabled: true\n---\n%s", out)
}
}
// TestGenerateNodeConfig_sniRouterDefaultsFalse verifies a fresh install (no
// existing node.yaml) renders sni_router.enabled: false — default OFF.
func TestGenerateNodeConfig_sniRouterDefaultsFalse(t *testing.T) {
dir := t.TempDir()
cg := NewConfigGenerator(dir)
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
if err != nil {
t.Fatalf("GenerateNodeConfig failed: %v", err)
}
if !strings.Contains(out, "sni_router:") {
t.Fatalf("node.yaml missing sni_router block\n---\n%s", out)
}
if !strings.Contains(out, "enabled: false") {
t.Errorf("fresh node.yaml should render sni_router.enabled: false\n---\n%s", out)
}
if cg.SNIRouterEnabled() {
t.Errorf("SNIRouterEnabled() should be false on a fresh install")
}
}
// TestGenerateNodeConfig_sniRouterDisabledStaysFalse verifies an existing
// node.yaml that explicitly disabled the router does not flip on during regen.
func TestGenerateNodeConfig_sniRouterDisabledStaysFalse(t *testing.T) {
dir := t.TempDir()
writeNodeYAML(t, dir, "sni_router:\n enabled: false\nhttp_gateway:\n enabled: true\n")
cg := NewConfigGenerator(dir)
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
if err != nil {
t.Fatalf("GenerateNodeConfig failed: %v", err)
}
if !strings.Contains(out, "enabled: false") {
t.Errorf("disabled sni_router should stay false on regen\n---\n%s", out)
}
}

View File

@ -15,6 +15,14 @@ node:
operator_wallet: "{{.OperatorWallet}}"
{{- end}}
# Stealth TURN-over-443 SNI router (feat-124). When enabled, the node runs
# orama-sni-router on :443 and Caddy is moved to :8443; default-OFF so existing
# nodes are byte-identical until an operator opts in. This block is preserved
# across config regeneration (GenerateNodeConfig carries forward an existing
# sni_router.enabled: true).
sni_router:
enabled: {{if .SNIRouterEnabled}}true{{else}}false{{end}}
database:
data_dir: "{{.DataDir}}/rqlite"
replication_factor: 3

View File

@ -66,6 +66,16 @@ type NodeConfigData struct {
SFUPort int // Local SFU signaling port the gateway proxies to
TURNDomain string // TURN domain (e.g., "turn.ns-myapp.dbrs.space")
TURNSecret string // HMAC-SHA1 shared secret for TURN credential generation
// SNIRouterEnabled gates the stealth TURN-over-443 SNI router (feat-124).
// Rendered as the top-level sni_router.enabled flag. Default false keeps
// existing nodes byte-identical (Caddy stays on :443); when true the node
// runs orama-sni-router on :443 and Caddy moves to :8443. This value is
// carried forward across config regeneration from the existing node.yaml
// (see production/config.go populateSNIRouterConfig) so a regen never wipes
// an operator's opt-in (the same preserve-from-existing discipline as the
// webrtc block, bugboard #259/#846).
SNIRouterEnabled bool
}
// GatewayConfigData holds parameters for gateway.yaml rendering

View File

@ -103,6 +103,36 @@ func TestRenderNodeConfig_webRTC(t *testing.T) {
}
}
func TestRenderNodeConfig_sniRouter(t *testing.T) {
// Enabled: top-level sni_router block renders enabled: true.
enabled, err := RenderNodeConfig(NodeConfigData{
NodeID: "node1",
SNIRouterEnabled: true,
})
if err != nil {
t.Fatalf("RenderNodeConfig failed: %v", err)
}
if !strings.Contains(enabled, "sni_router:") {
t.Errorf("rendered node config missing sni_router block\n---\n%s", enabled)
}
if !strings.Contains(enabled, "enabled: true") {
t.Errorf("sni_router should render enabled: true\n---\n%s", enabled)
}
// Default: the block is always present, defaulting to false (so the flag is
// discoverable to operators and round-trips through regen).
disabled, err := RenderNodeConfig(NodeConfigData{NodeID: "node1"})
if err != nil {
t.Fatalf("RenderNodeConfig failed: %v", err)
}
if !strings.Contains(disabled, "sni_router:") {
t.Errorf("sni_router block should always be present\n---\n%s", disabled)
}
if !strings.Contains(disabled, "enabled: false") {
t.Errorf("default sni_router should render enabled: false\n---\n%s", disabled)
}
}
func TestRenderGatewayConfig(t *testing.T) {
bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890"
data := GatewayConfigData{

View File

@ -1114,6 +1114,48 @@ func (g *Gateway) namespaceWebRTCDisablePublicHandler(w http.ResponseWriter, r *
})
}
// namespaceWebRTCStealthPublicHandler handles POST /v1/namespace/webrtc/stealth/{enable|disable}
// (feat-124). Public: authenticated by JWT/API key via auth middleware;
// namespace from context. `enable` is true for the enable route.
func (g *Gateway) namespaceWebRTCStealthPublicHandler(w http.ResponseWriter, r *http.Request, enable bool) {
if r.Method != http.MethodPost {
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
return
}
namespaceName, _ := r.Context().Value(CtxKeyNamespaceOverride).(string)
if namespaceName == "" {
writeError(w, http.StatusForbidden, "namespace not resolved")
return
}
if g.webrtcManager == nil {
writeError(w, http.StatusServiceUnavailable, "WebRTC management not enabled")
return
}
var err error
action := "disabled"
if enable {
action = "enabled"
err = g.webrtcManager.EnableWebRTCStealth(r.Context(), namespaceName)
} else {
err = g.webrtcManager.DisableWebRTCStealth(r.Context(), namespaceName)
}
if err != nil {
writeError(w, http.StatusInternalServerError, err.Error())
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(map[string]interface{}{
"status": "ok",
"namespace": namespaceName,
"message": "WebRTC stealth " + action + " successfully",
})
}
// namespaceWebRTCStatusPublicHandler handles GET /v1/namespace/webrtc/status
// Public: authenticated by JWT/API key via auth middleware. Namespace from context.
func (g *Gateway) namespaceWebRTCStatusPublicHandler(w http.ResponseWriter, r *http.Request) {

View File

@ -64,6 +64,12 @@ type WebRTCManager interface {
DisableWebRTC(ctx context.Context, namespaceName string) error
// GetWebRTCStatus returns the WebRTC config for a namespace, or nil if not enabled.
GetWebRTCStatus(ctx context.Context, namespaceName string) (interface{}, error)
// EnableWebRTCStealth / DisableWebRTCStealth toggle the censorship-
// resistant TURNS:443 path (feat-124): stealth cert on the TURN servers,
// stealth DNS records, and the turns:<stealth-host>:443 rung in the
// turn.credentials URI ladder. Requires WebRTC to already be enabled.
EnableWebRTCStealth(ctx context.Context, namespaceName string) error
DisableWebRTCStealth(ctx context.Context, namespaceName string) error
}
// Handlers holds dependencies for authentication HTTP handlers

View File

@ -53,6 +53,8 @@ type SpawnRequest struct {
GatewaySFUPort int `json:"gateway_sfu_port,omitempty"`
GatewayTURNDomain string `json:"gateway_turn_domain,omitempty"`
GatewayTURNSecret string `json:"gateway_turn_secret,omitempty"`
// Stealth TURNS:443 host (feat-124); empty when stealth is disabled.
GatewayTURNStealthDomain string `json:"gateway_turn_stealth_domain,omitempty"`
// Host serverless secrets encryption key forwarded to the spawned
// namespace gateway (bugboard #837 follow-up). Same value on every node.
GatewaySecretsEncryptionKey string `json:"gateway_secrets_encryption_key,omitempty"`
@ -67,14 +69,15 @@ type SpawnRequest struct {
RQLiteDSN string `json:"rqlite_dsn,omitempty"`
// TURN config (when action = "spawn-turn")
TURNListenAddr string `json:"turn_listen_addr,omitempty"`
TURNTURNSAddr string `json:"turn_turns_addr,omitempty"`
TURNPublicIP string `json:"turn_public_ip,omitempty"`
TURNRealm string `json:"turn_realm,omitempty"`
TURNAuthSecret string `json:"turn_auth_secret,omitempty"`
TURNRelayStart int `json:"turn_relay_start,omitempty"`
TURNRelayEnd int `json:"turn_relay_end,omitempty"`
TURNDomain string `json:"turn_domain,omitempty"`
TURNListenAddr string `json:"turn_listen_addr,omitempty"`
TURNTURNSAddr string `json:"turn_turns_addr,omitempty"`
TURNPublicIP string `json:"turn_public_ip,omitempty"`
TURNRealm string `json:"turn_realm,omitempty"`
TURNAuthSecret string `json:"turn_auth_secret,omitempty"`
TURNRelayStart int `json:"turn_relay_start,omitempty"`
TURNRelayEnd int `json:"turn_relay_end,omitempty"`
TURNDomain string `json:"turn_domain,omitempty"`
TURNStealthDomain string `json:"turn_stealth_domain,omitempty"`
// Cluster state (when action = "save-cluster-state")
ClusterState json.RawMessage `json:"cluster_state,omitempty"`
@ -237,6 +240,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
WebRTCEnabled: req.GatewayWebRTCEnabled,
SFUPort: req.GatewaySFUPort,
TURNDomain: req.GatewayTURNDomain,
TURNStealthDomain: req.GatewayTURNStealthDomain,
TURNSecret: req.GatewayTURNSecret,
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
}
@ -291,6 +295,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
WebRTCEnabled: req.GatewayWebRTCEnabled,
SFUPort: req.GatewaySFUPort,
TURNDomain: req.GatewayTURNDomain,
TURNStealthDomain: req.GatewayTURNStealthDomain,
TURNSecret: req.GatewayTURNSecret,
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
}
@ -360,6 +365,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
RelayPortStart: req.TURNRelayStart,
RelayPortEnd: req.TURNRelayEnd,
TURNDomain: req.TURNDomain,
StealthDomain: req.TURNStealthDomain,
}
if err := h.systemdSpawner.SpawnTURN(ctx, req.Namespace, req.NodeID, cfg); err != nil {
h.logger.Error("Failed to spawn TURN instance", zap.Error(err))

View File

@ -95,6 +95,11 @@ type InstanceConfig struct {
SFUPort int // SFU signaling port on this node
TURNDomain string // TURN server domain (e.g., "turn.ns-alice.orama-devnet.network")
TURNSecret string // TURN shared secret for credential generation
// TURNStealthDomain is the neutral stealth TURNS host (feat-124,
// cdn-<hash>.<base-domain>). Non-empty only when webrtc stealth is
// enabled for the namespace; turn.credentials then advertises
// `turns:<TURNStealthDomain>:443` as the final URI-ladder rung.
TURNStealthDomain string
// SecretsEncryptionKey is the host-wide AES-256 serverless secrets
// encryption key (hex-encoded). Bugboard #837 follow-up: the host gateway
// receives this via gateway.Config but spawned namespace gateways never
@ -109,10 +114,11 @@ type InstanceConfig struct {
// GatewayYAMLWebRTC represents the webrtc section of the gateway YAML config.
// Must match yamlWebRTCCfg in cmd/gateway/config.go.
type GatewayYAMLWebRTC struct {
Enabled bool `yaml:"enabled"`
SFUPort int `yaml:"sfu_port,omitempty"`
TURNDomain string `yaml:"turn_domain,omitempty"`
TURNSecret string `yaml:"turn_secret,omitempty"`
Enabled bool `yaml:"enabled"`
SFUPort int `yaml:"sfu_port,omitempty"`
TURNDomain string `yaml:"turn_domain,omitempty"`
TURNSecret string `yaml:"turn_secret,omitempty"`
TURNStealthDomain string `yaml:"turn_stealth_domain,omitempty"`
}
// GatewayYAMLConfig represents the gateway YAML configuration structure
@ -334,10 +340,11 @@ func (is *InstanceSpawner) generateConfig(configPath string, cfg InstanceConfig,
IPFSAPIURL: cfg.IPFSAPIURL,
IPFSReplicationFactor: cfg.IPFSReplicationFactor,
WebRTC: GatewayYAMLWebRTC{
Enabled: cfg.WebRTCEnabled,
SFUPort: cfg.SFUPort,
TURNDomain: cfg.TURNDomain,
TURNSecret: cfg.TURNSecret,
Enabled: cfg.WebRTCEnabled,
SFUPort: cfg.SFUPort,
TURNDomain: cfg.TURNDomain,
TURNSecret: cfg.TURNSecret,
TURNStealthDomain: cfg.TURNStealthDomain,
},
SecretsEncryptionKey: cfg.SecretsEncryptionKey,
}

View File

@ -67,6 +67,12 @@ func (g *Gateway) Routes() http.Handler {
// Namespace WebRTC enable/disable/status (public, JWT/API key auth via middleware)
mux.HandleFunc("/v1/namespace/webrtc/enable", g.namespaceWebRTCEnablePublicHandler)
mux.HandleFunc("/v1/namespace/webrtc/disable", g.namespaceWebRTCDisablePublicHandler)
mux.HandleFunc("/v1/namespace/webrtc/stealth/enable", func(w http.ResponseWriter, r *http.Request) {
g.namespaceWebRTCStealthPublicHandler(w, r, true)
})
mux.HandleFunc("/v1/namespace/webrtc/stealth/disable", func(w http.ResponseWriter, r *http.Request) {
g.namespaceWebRTCStealthPublicHandler(w, r, false)
})
mux.HandleFunc("/v1/namespace/webrtc/status", g.namespaceWebRTCStatusPublicHandler)
// auth endpoints

View File

@ -678,23 +678,24 @@ func (cm *ClusterManager) spawnGatewayRemote(ctx context.Context, nodeIP string,
}
resp, err := cm.sendSpawnRequest(ctx, nodeIP, map[string]interface{}{
"action": "spawn-gateway",
"namespace": cfg.Namespace,
"node_id": cfg.NodeID,
"gateway_http_port": cfg.HTTPPort,
"gateway_base_domain": cfg.BaseDomain,
"gateway_rqlite_dsn": cfg.RQLiteDSN,
"gateway_global_rqlite_dsn": cfg.GlobalRQLiteDSN,
"gateway_olric_servers": cfg.OlricServers,
"gateway_olric_timeout": olricTimeout,
"ipfs_cluster_api_url": cfg.IPFSClusterAPIURL,
"ipfs_api_url": cfg.IPFSAPIURL,
"ipfs_timeout": ipfsTimeout,
"ipfs_replication_factor": cfg.IPFSReplicationFactor,
"gateway_webrtc_enabled": cfg.WebRTCEnabled,
"gateway_sfu_port": cfg.SFUPort,
"gateway_turn_domain": cfg.TURNDomain,
"gateway_turn_secret": cfg.TURNSecret,
"action": "spawn-gateway",
"namespace": cfg.Namespace,
"node_id": cfg.NodeID,
"gateway_http_port": cfg.HTTPPort,
"gateway_base_domain": cfg.BaseDomain,
"gateway_rqlite_dsn": cfg.RQLiteDSN,
"gateway_global_rqlite_dsn": cfg.GlobalRQLiteDSN,
"gateway_olric_servers": cfg.OlricServers,
"gateway_olric_timeout": olricTimeout,
"ipfs_cluster_api_url": cfg.IPFSClusterAPIURL,
"ipfs_api_url": cfg.IPFSAPIURL,
"ipfs_timeout": ipfsTimeout,
"ipfs_replication_factor": cfg.IPFSReplicationFactor,
"gateway_webrtc_enabled": cfg.WebRTCEnabled,
"gateway_sfu_port": cfg.SFUPort,
"gateway_turn_domain": cfg.TURNDomain,
"gateway_turn_secret": cfg.TURNSecret,
"gateway_turn_stealth_domain": cfg.TURNStealthDomain,
// Bugboard #837 follow-up: carry the host secrets encryption key to
// the remote node so its spawned namespace gateway can manage secrets.
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
@ -1614,6 +1615,7 @@ func (cm *ClusterManager) restoreClusterOnNode(ctx context.Context, clusterID, n
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", namespaceName, cm.baseDomain)
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
gwCfg.TURNStealthDomain = cm.stealthDomainFor(namespaceName, webrtcCfg)
}
}
@ -1679,8 +1681,9 @@ type ClusterLocalState struct {
// WebRTC fields (zero values when WebRTC not enabled — backward compatible)
HasSFU bool `json:"has_sfu,omitempty"`
HasTURN bool `json:"has_turn,omitempty"`
TURNSharedSecret string `json:"turn_shared_secret,omitempty"` // Needed for gateway to generate TURN credentials on cold start
TURNDomain string `json:"turn_domain,omitempty"` // TURN server domain for gateway config
TURNSharedSecret string `json:"turn_shared_secret,omitempty"` // Needed for gateway to generate TURN credentials on cold start
TURNDomain string `json:"turn_domain,omitempty"` // TURN server domain for gateway config
TURNStealthDomain string `json:"turn_stealth_domain,omitempty"` // Stealth TURNS:443 host (feat-124); empty when stealth disabled
TURNCredentialTTL int `json:"turn_credential_ttl,omitempty"`
SFUSignalingPort int `json:"sfu_signaling_port,omitempty"`
SFUMediaPortStart int `json:"sfu_media_port_start,omitempty"`
@ -1836,10 +1839,11 @@ func (cm *ClusterManager) RestoreLocalClustersFromDisk(ctx context.Context) (int
// restoreWebRTC is the resolved WebRTC gateway config for a restored
// namespace gateway.
type restoreWebRTC struct {
enabled bool
sfuPort int
turnDomain string
turnSecret string
enabled bool
sfuPort int
turnDomain string
turnSecret string
stealthDomain string // feat-124: empty when webrtc stealth is disabled
}
// chooseRestoreWebRTC resolves a restored gateway's WebRTC config. TWO
@ -1864,11 +1868,12 @@ type restoreWebRTC struct {
// Extracted as a pure function so the precedence is unit-testable without
// standing up the full restore path (systemd spawner + DB + port store).
func chooseRestoreWebRTC(
stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret string,
dbFetch func() (turnSecret, turnDomain string, sfuPort int),
stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret, stateStealthDomain string,
dbFetch func() (turnSecret, turnDomain, stealthDomain string, sfuPort int),
) restoreWebRTC {
turnSecret := stateTURNSecret
turnDomain := stateTURNDomain
stealthDomain := stateStealthDomain
sfuPort := 0
if stateHasSFU && stateSFUPort > 0 {
sfuPort = stateSFUPort
@ -1878,12 +1883,17 @@ func chooseRestoreWebRTC(
// the marker that the namespace has WebRTC enabled at all. The state
// file is not updated by EnableWebRTC, so a namespace enabled after
// the state file was written reaches here with an empty secret.
// (Stealth toggles DO rewrite cluster state on every node, so the
// state-first read stays fresh for stealthDomain too.)
if turnSecret == "" {
if dbSecret, dbDomain, dbSFU := dbFetch(); dbSecret != "" {
if dbSecret, dbDomain, dbStealth, dbSFU := dbFetch(); dbSecret != "" {
turnSecret = dbSecret
if turnDomain == "" {
turnDomain = dbDomain
}
if stealthDomain == "" {
stealthDomain = dbStealth
}
if sfuPort == 0 {
sfuPort = dbSFU
}
@ -1891,10 +1901,11 @@ func chooseRestoreWebRTC(
}
return restoreWebRTC{
enabled: turnSecret != "" || sfuPort > 0,
sfuPort: sfuPort,
turnDomain: turnDomain,
turnSecret: turnSecret,
enabled: turnSecret != "" || sfuPort > 0,
sfuPort: sfuPort,
turnDomain: turnDomain,
turnSecret: turnSecret,
stealthDomain: stealthDomain,
}
}
@ -2050,11 +2061,11 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
// fields here. The lazy dbFetch only hits the DB when the state
// file is incomplete.
wr := chooseRestoreWebRTC(
state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret,
func() (turnSecret, turnDomain string, sfuPort int) {
state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret, state.TURNStealthDomain,
func() (turnSecret, turnDomain, stealthDomain string, sfuPort int) {
webrtcCfg, err := cm.GetWebRTCConfig(ctx, state.NamespaceName)
if err != nil || webrtcCfg == nil {
return "", "", 0
return "", "", "", 0
}
// TURN is namespace-wide; SFU port is per-node and may be
// absent on a gateway-only (non-SFU) node — that's fine,
@ -2065,6 +2076,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
}
return webrtcCfg.TURNSharedSecret,
fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
cm.stealthDomainFor(state.NamespaceName, webrtcCfg),
sfu
},
)
@ -2076,6 +2088,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
gwCfg.SFUPort = wr.sfuPort
gwCfg.TURNDomain = wr.turnDomain
gwCfg.TURNSecret = wr.turnSecret
gwCfg.TURNStealthDomain = wr.stealthDomain
}
resp, err := http.Get(fmt.Sprintf("http://localhost:%d/v1/health", pb.GatewayHTTPPort))
@ -2126,6 +2139,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
RelayPortStart: state.TURNRelayPortStart,
RelayPortEnd: state.TURNRelayPortEnd,
TURNDomain: fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
StealthDomain: cm.stealthDomainFor(state.NamespaceName, webrtcCfg),
}
if err := cm.systemdSpawner.SpawnTURN(ctx, state.NamespaceName, cm.localNodeID, turnCfg); err != nil {
cm.logger.Error("Failed to restore TURN from state", zap.String("namespace", state.NamespaceName), zap.Error(err))

View File

@ -0,0 +1,263 @@
package namespace
import (
"context"
"fmt"
"github.com/DeBrosOfficial/network/pkg/client"
"github.com/DeBrosOfficial/network/pkg/turn"
"go.uber.org/zap"
)
// Stealth TURNS-over-443 lifecycle (feat-124, censorship-resistant calling).
//
// Enabling stealth for a namespace whose WebRTC is already running:
// 1. creates DNS A records for the neutral stealth host -> the TURN nodes,
// 2. flips namespace_webrtc_config.stealth_enabled,
// 3. re-spawns the namespace's TURN servers with the stealth domain (the
// spawner provisions a Let's Encrypt cert for it — hard-fail, never
// self-signed),
// 4. rewrites cluster-state.json on every node (so DB-less restores keep
// the stealth domain), and
// 5. restarts the namespace gateways so turn.credentials advertises
// `turns:<stealth-host>:443` as the final URI-ladder rung.
//
// The SNI router on :443 discovers the route (stealth host -> local TURN TLS
// port) from the TURN config files on disk — no extra registration step.
// stealthDomainFor returns the namespace's stealth TURNS host when stealth is
// enabled in its WebRTC config, else "" (callers treat empty as disabled).
func (cm *ClusterManager) stealthDomainFor(namespaceName string, webrtcCfg *WebRTCConfig) string {
if webrtcCfg == nil || !webrtcCfg.StealthEnabled {
return ""
}
return turn.StealthHostForNamespace(namespaceName, cm.baseDomain)
}
// EnableWebRTCStealth enables the stealth TURNS:443 path for a namespace.
// Requires WebRTC to already be enabled.
func (cm *ClusterManager) EnableWebRTCStealth(ctx context.Context, namespaceName string) error {
cluster, webrtcCfg, err := cm.getStealthPrereqs(ctx, namespaceName)
if err != nil {
return err
}
if webrtcCfg.StealthEnabled {
return ErrWebRTCStealthAlreadyEnabled
}
stealthDomain := turn.StealthHostForNamespace(namespaceName, cm.baseDomain)
cm.logger.Info("Enabling WebRTC stealth for namespace",
zap.String("namespace", namespaceName),
zap.String("stealth_domain", stealthDomain))
clusterNodes, err := cm.getClusterNodesWithIPs(ctx, cluster.ID)
if err != nil {
return fmt.Errorf("failed to get cluster nodes: %w", err)
}
turnBlocks, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
if err != nil {
return fmt.Errorf("failed to get TURN allocations for namespace %s: %w", namespaceName, err)
}
if len(turnBlocks) == 0 {
return fmt.Errorf("no TURN allocations found for namespace %s (is WebRTC fully enabled?)", namespaceName)
}
// DNS first — cert provisioning and clients both need the name to resolve.
var turnIPs []string
for _, block := range turnBlocks {
for _, n := range clusterNodes {
if n.NodeID == block.NodeID {
turnIPs = append(turnIPs, n.PublicIP)
}
}
}
if err := cm.dnsManager.CreateStealthTURNRecords(ctx, namespaceName, stealthDomain, turnIPs); err != nil {
return fmt.Errorf("failed to create stealth DNS records: %w", err)
}
if err := cm.setStealthEnabled(ctx, cluster.ID, true); err != nil {
return err
}
// Re-spawn TURN with the stealth domain; roll back on failure so the
// board never claims a stealth endpoint that doesn't terminate TLS.
if err := cm.respawnTURNWithStealth(ctx, cluster, clusterNodes, turnBlocks, webrtcCfg.TURNSharedSecret, stealthDomain); err != nil {
cm.rollbackStealthEnable(ctx, cluster.ID, namespaceName)
return fmt.Errorf("failed to re-spawn TURN with stealth cert (stealth rolled back): %w", err)
}
cm.refreshStateAndGateways(ctx, cluster, clusterNodes, stealthDomain, webrtcCfg.TURNSharedSecret)
cm.logEvent(ctx, cluster.ID, EventWebRTCEnabled, "",
fmt.Sprintf("WebRTC stealth enabled (%s)", stealthDomain), nil)
return nil
}
// DisableWebRTCStealth turns the stealth TURNS:443 path off again. TURN and
// the baseline ladder (udp/tcp 3478, turns:5349) keep running.
func (cm *ClusterManager) DisableWebRTCStealth(ctx context.Context, namespaceName string) error {
cluster, webrtcCfg, err := cm.getStealthPrereqs(ctx, namespaceName)
if err != nil {
return err
}
if !webrtcCfg.StealthEnabled {
return ErrWebRTCStealthNotEnabled
}
cm.logger.Info("Disabling WebRTC stealth for namespace", zap.String("namespace", namespaceName))
clusterNodes, err := cm.getClusterNodesWithIPs(ctx, cluster.ID)
if err != nil {
return fmt.Errorf("failed to get cluster nodes: %w", err)
}
turnBlocks, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
if err != nil {
return fmt.Errorf("failed to get TURN allocations: %w", err)
}
if err := cm.setStealthEnabled(ctx, cluster.ID, false); err != nil {
return err
}
if err := cm.respawnTURNWithStealth(ctx, cluster, clusterNodes, turnBlocks, webrtcCfg.TURNSharedSecret, ""); err != nil {
return fmt.Errorf("failed to re-spawn TURN without stealth: %w", err)
}
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
cm.logger.Warn("Failed to delete stealth DNS records", zap.Error(err))
}
cm.refreshStateAndGateways(ctx, cluster, clusterNodes, "", webrtcCfg.TURNSharedSecret)
cm.logEvent(ctx, cluster.ID, EventWebRTCDisabled, "", "WebRTC stealth disabled", nil)
return nil
}
// getStealthPrereqs validates the cluster exists and WebRTC is enabled,
// returning both records (with the TURN secret already decrypted).
func (cm *ClusterManager) getStealthPrereqs(ctx context.Context, namespaceName string) (*NamespaceCluster, *WebRTCConfig, error) {
cluster, err := cm.GetClusterByNamespace(ctx, namespaceName)
if err != nil {
return nil, nil, fmt.Errorf("failed to get cluster: %w", err)
}
if cluster == nil {
return nil, nil, ErrClusterNotFound
}
webrtcCfg, err := cm.GetWebRTCConfig(ctx, namespaceName)
if err != nil {
return nil, nil, fmt.Errorf("failed to get WebRTC config: %w", err)
}
if webrtcCfg == nil {
return nil, nil, ErrWebRTCNotEnabled
}
return cluster, webrtcCfg, nil
}
// setStealthEnabled flips the stealth flag in namespace_webrtc_config.
func (cm *ClusterManager) setStealthEnabled(ctx context.Context, clusterID string, enabled bool) error {
internalCtx := client.WithInternalAuth(ctx)
val := 0
if enabled {
val = 1
}
if _, err := cm.db.Exec(internalCtx,
`UPDATE namespace_webrtc_config SET stealth_enabled = ? WHERE namespace_cluster_id = ? AND enabled = 1`,
val, clusterID); err != nil {
return fmt.Errorf("failed to update stealth_enabled: %w", err)
}
return nil
}
// respawnTURNWithStealth stops and re-spawns every TURN instance of the
// cluster with the given stealth domain ("" = stealth off). The spawner
// provisions the stealth cert and writes the new TURN config; the SNI
// router's discovery picks the route change up from disk.
func (cm *ClusterManager) respawnTURNWithStealth(
ctx context.Context,
cluster *NamespaceCluster,
clusterNodes []clusterNodeInfo,
turnBlocks []WebRTCPortBlock,
turnSecret, stealthDomain string,
) error {
turnDomain := fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
for _, block := range turnBlocks {
var node *clusterNodeInfo
for i := range clusterNodes {
if clusterNodes[i].NodeID == block.NodeID {
node = &clusterNodes[i]
break
}
}
if node == nil {
return fmt.Errorf("TURN node %s not found in cluster nodes", block.NodeID)
}
cm.stopTURNOnNode(ctx, node.NodeID, node.InternalIP, cluster.NamespaceName)
turnCfg := TURNInstanceConfig{
Namespace: cluster.NamespaceName,
NodeID: node.NodeID,
ListenAddr: fmt.Sprintf("0.0.0.0:%d", block.TURNListenPort),
TURNSListenAddr: fmt.Sprintf("0.0.0.0:%d", block.TURNTLSPort),
PublicIP: node.PublicIP,
Realm: cm.baseDomain,
AuthSecret: turnSecret,
RelayPortStart: block.TURNRelayPortStart,
RelayPortEnd: block.TURNRelayPortEnd,
TURNDomain: turnDomain,
StealthDomain: stealthDomain,
}
if err := cm.spawnTURNOnNode(ctx, *node, cluster.NamespaceName, turnCfg); err != nil {
return fmt.Errorf("failed to re-spawn TURN on node %s: %w", node.NodeID, err)
}
}
return nil
}
// rollbackStealthEnable best-effort reverts the DB flag + DNS records after a
// failed stealth enable, so the system never advertises a half-built path.
func (cm *ClusterManager) rollbackStealthEnable(ctx context.Context, clusterID, namespaceName string) {
if err := cm.setStealthEnabled(ctx, clusterID, false); err != nil {
cm.logger.Warn("Stealth rollback: failed to clear stealth_enabled", zap.Error(err))
}
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
cm.logger.Warn("Stealth rollback: failed to delete DNS records", zap.Error(err))
}
}
// refreshStateAndGateways rewrites cluster-state.json on all nodes with the
// new stealth domain and restarts the namespace gateways so turn.credentials
// reflects the change. Failures are logged per node (the reconciler converges
// stragglers later via the gatewayConfigInSync drift check).
func (cm *ClusterManager) refreshStateAndGateways(
ctx context.Context,
cluster *NamespaceCluster,
clusterNodes []clusterNodeInfo,
stealthDomain, turnSecret string,
) {
turnDomain := fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
sfuBlockList, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "sfu")
if err != nil {
cm.logger.Warn("Failed to get SFU allocations for state refresh", zap.Error(err))
}
turnBlockList, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
if err != nil {
cm.logger.Warn("Failed to get TURN allocations for state refresh", zap.Error(err))
}
sfuBlocks := make(map[string]*WebRTCPortBlock)
for i := range sfuBlockList {
sfuBlocks[sfuBlockList[i].NodeID] = &sfuBlockList[i]
}
turnBlocks := make(map[string]*WebRTCPortBlock)
for i := range turnBlockList {
turnBlocks[turnBlockList[i].NodeID] = &turnBlockList[i]
}
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, stealthDomain, turnSecret)
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
if err != nil {
cm.logger.Warn("Failed to get port blocks for gateway restart after stealth toggle", zap.Error(err))
return
}
nodePortBlocks := make(map[string]*PortBlock)
for i := range portBlocks {
nodePortBlocks[portBlocks[i].NodeID] = &portBlocks[i]
}
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, stealthDomain, turnSecret)
}

View File

@ -204,10 +204,10 @@ func (cm *ClusterManager) EnableWebRTC(ctx context.Context, namespaceName, enabl
}
// 14. Update cluster-state.json on all nodes with WebRTC info
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, turnSecret)
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, "", turnSecret)
// 15. Restart namespace gateways with WebRTC config so they register WebRTC routes
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, turnSecret)
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, "", turnSecret)
cm.logEvent(ctx, cluster.ID, EventWebRTCEnabled, "",
fmt.Sprintf("WebRTC enabled: SFU on %d nodes, TURN on %d nodes", len(clusterNodes), len(turnNodes)), nil)
@ -273,17 +273,23 @@ func (cm *ClusterManager) DisableWebRTC(ctx context.Context, namespaceName strin
cm.logger.Warn("Failed to deallocate WebRTC ports", zap.Error(err))
}
// 7. Delete TURN DNS records
// 7. Delete TURN DNS records (both the regular and the feat-124 stealth
// records — a full WebRTC teardown must not orphan stealth A records when
// the namespace had stealth enabled). Delete-by-tag is a no-op when the
// stealth records are absent, so this is safe unconditionally.
if err := cm.dnsManager.DeleteTURNRecords(ctx, namespaceName); err != nil {
cm.logger.Warn("Failed to delete TURN DNS records", zap.Error(err))
}
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
cm.logger.Warn("Failed to delete stealth TURN DNS records", zap.Error(err))
}
// 8. Clean up DB tables
cm.db.Exec(internalCtx, `DELETE FROM webrtc_rooms WHERE namespace_cluster_id = ?`, cluster.ID)
cm.db.Exec(internalCtx, `DELETE FROM namespace_webrtc_config WHERE namespace_cluster_id = ?`, cluster.ID)
// 9. Update cluster-state.json to remove WebRTC info
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, nil, nil, "", "")
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, nil, nil, "", "", "")
// 10. Restart namespace gateways without WebRTC config so they unregister WebRTC routes
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
@ -292,7 +298,7 @@ func (cm *ClusterManager) DisableWebRTC(ctx context.Context, namespaceName strin
for i := range portBlocks {
nodePortBlocks[portBlocks[i].NodeID] = &portBlocks[i]
}
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, nil, "", "")
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, nil, "", "", "")
} else {
cm.logger.Warn("Failed to get port blocks for gateway restart after WebRTC disable", zap.Error(err))
}
@ -487,17 +493,18 @@ func (cm *ClusterManager) spawnSFURemote(ctx context.Context, nodeIP string, cfg
// spawnTURNRemote sends a spawn-turn request to a remote node
func (cm *ClusterManager) spawnTURNRemote(ctx context.Context, nodeIP string, cfg TURNInstanceConfig) error {
_, err := cm.sendSpawnRequest(ctx, nodeIP, map[string]interface{}{
"action": "spawn-turn",
"namespace": cfg.Namespace,
"node_id": cfg.NodeID,
"turn_listen_addr": cfg.ListenAddr,
"turn_turns_addr": cfg.TURNSListenAddr,
"turn_public_ip": cfg.PublicIP,
"turn_realm": cfg.Realm,
"turn_auth_secret": cfg.AuthSecret,
"turn_relay_start": cfg.RelayPortStart,
"turn_relay_end": cfg.RelayPortEnd,
"turn_domain": cfg.TURNDomain,
"action": "spawn-turn",
"namespace": cfg.Namespace,
"node_id": cfg.NodeID,
"turn_listen_addr": cfg.ListenAddr,
"turn_turns_addr": cfg.TURNSListenAddr,
"turn_public_ip": cfg.PublicIP,
"turn_realm": cfg.Realm,
"turn_auth_secret": cfg.AuthSecret,
"turn_relay_start": cfg.RelayPortStart,
"turn_relay_end": cfg.RelayPortEnd,
"turn_domain": cfg.TURNDomain,
"turn_stealth_domain": cfg.StealthDomain,
})
return err
}
@ -558,7 +565,7 @@ func (cm *ClusterManager) updateClusterStateWithWebRTC(
nodes []clusterNodeInfo,
sfuBlocks map[string]*WebRTCPortBlock,
turnBlocks map[string]*WebRTCPortBlock,
turnDomain, turnSecret string,
turnDomain, turnStealthDomain, turnSecret string,
) {
// Get existing port blocks for base state
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
@ -635,6 +642,7 @@ func (cm *ClusterManager) updateClusterStateWithWebRTC(
}
// Persist TURN domain and secret so gateways can be restored on cold start
state.TURNDomain = turnDomain
state.TURNStealthDomain = turnStealthDomain
state.TURNSharedSecret = turnSecret
if node.NodeID == cm.localNodeID {
@ -671,7 +679,7 @@ func (cm *ClusterManager) restartGatewaysWithWebRTC(
nodes []clusterNodeInfo,
portBlocks map[string]*PortBlock,
sfuBlocks map[string]*WebRTCPortBlock,
turnDomain, turnSecret string,
turnDomain, turnStealthDomain, turnSecret string,
) {
// Build Olric server addresses from port blocks + node IPs
var olricServers []string
@ -715,6 +723,7 @@ func (cm *ClusterManager) restartGatewaysWithWebRTC(
WebRTCEnabled: webrtcEnabled,
SFUPort: sfuPort,
TURNDomain: turnDomain,
TURNStealthDomain: turnStealthDomain,
TURNSecret: turnSecret,
// Bugboard #837 follow-up: preserve the secrets key on WebRTC
// restarts so enabling WebRTC doesn't drop secrets management.
@ -750,23 +759,24 @@ func (cm *ClusterManager) restartGatewayRemote(ctx context.Context, nodeIP strin
}
_, err := cm.sendSpawnRequest(ctx, nodeIP, map[string]interface{}{
"action": "restart-gateway",
"namespace": cfg.Namespace,
"node_id": cfg.NodeID,
"gateway_http_port": cfg.HTTPPort,
"gateway_base_domain": cfg.BaseDomain,
"gateway_rqlite_dsn": cfg.RQLiteDSN,
"gateway_global_rqlite_dsn": cfg.GlobalRQLiteDSN,
"gateway_olric_servers": cfg.OlricServers,
"gateway_olric_timeout": olricTimeout,
"ipfs_cluster_api_url": cfg.IPFSClusterAPIURL,
"ipfs_api_url": cfg.IPFSAPIURL,
"ipfs_timeout": ipfsTimeout,
"ipfs_replication_factor": cfg.IPFSReplicationFactor,
"gateway_webrtc_enabled": cfg.WebRTCEnabled,
"gateway_sfu_port": cfg.SFUPort,
"gateway_turn_domain": cfg.TURNDomain,
"gateway_turn_secret": cfg.TURNSecret,
"action": "restart-gateway",
"namespace": cfg.Namespace,
"node_id": cfg.NodeID,
"gateway_http_port": cfg.HTTPPort,
"gateway_base_domain": cfg.BaseDomain,
"gateway_rqlite_dsn": cfg.RQLiteDSN,
"gateway_global_rqlite_dsn": cfg.GlobalRQLiteDSN,
"gateway_olric_servers": cfg.OlricServers,
"gateway_olric_timeout": olricTimeout,
"ipfs_cluster_api_url": cfg.IPFSClusterAPIURL,
"ipfs_api_url": cfg.IPFSAPIURL,
"ipfs_timeout": ipfsTimeout,
"ipfs_replication_factor": cfg.IPFSReplicationFactor,
"gateway_webrtc_enabled": cfg.WebRTCEnabled,
"gateway_sfu_port": cfg.SFUPort,
"gateway_turn_domain": cfg.TURNDomain,
"gateway_turn_stealth_domain": cfg.TURNStealthDomain,
"gateway_turn_secret": cfg.TURNSecret,
// Bugboard #837 follow-up: preserve the secrets key on WebRTC restarts.
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
})

View File

@ -537,6 +537,7 @@ func (cm *ClusterManager) ReplaceClusterNode(ctx context.Context, cluster *Names
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
gwCfg.TURNStealthDomain = cm.stealthDomainFor(cluster.NamespaceName, webrtcCfg)
}
}
@ -1080,6 +1081,7 @@ func (cm *ClusterManager) addNodeToCluster(
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
gwCfg.TURNStealthDomain = cm.stealthDomainFor(cluster.NamespaceName, webrtcCfg)
}
}

View File

@ -353,6 +353,78 @@ func (drm *DNSRecordManager) DeleteTURNRecords(ctx context.Context, namespaceNam
return nil
}
// stealthDNSNamespace is the dns_records ownership tag for a namespace's
// stealth TURNS records, distinct from "namespace-turn:" so deleting one set
// never touches the other.
func stealthDNSNamespace(namespaceName string) string {
return "namespace-turn-stealth:" + namespaceName
}
// CreateStealthTURNRecords creates DNS A records for the stealth TURNS host
// (feat-124): <stealthHost> -> TURN node IPs. The hostname is the neutral
// cdn-<hash>.<base-domain> label from turn.StealthHostForNamespace — it lives
// directly under the base domain (NOT under ns-<namespace>) so the SNI string
// never identifies the app.
func (drm *DNSRecordManager) CreateStealthTURNRecords(ctx context.Context, namespaceName, stealthHost string, turnIPs []string) error {
internalCtx := client.WithInternalAuth(ctx)
if stealthHost == "" {
return &ClusterError{Message: "no stealth host provided for DNS records"}
}
if len(turnIPs) == 0 {
return &ClusterError{Message: "no TURN IPs provided for stealth DNS records"}
}
fqdn := stealthHost + "."
drm.logger.Info("Creating stealth TURNS DNS records",
zap.String("namespace", namespaceName),
zap.String("fqdn", fqdn),
zap.Strings("turn_ips", turnIPs),
)
deleteQuery := `DELETE FROM dns_records WHERE namespace = ?`
_, _ = drm.db.Exec(internalCtx, deleteQuery, stealthDNSNamespace(namespaceName))
now := time.Now()
for _, ip := range turnIPs {
insertQuery := `
INSERT INTO dns_records (
fqdn, record_type, value, ttl, namespace, created_by, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`
_, err := drm.db.Exec(internalCtx, insertQuery,
fqdn, "A", ip, 60,
stealthDNSNamespace(namespaceName),
"cluster-manager",
now, now,
)
if err != nil {
return &ClusterError{
Message: fmt.Sprintf("failed to create stealth TURNS DNS record %s -> %s", fqdn, ip),
Cause: err,
}
}
}
return nil
}
// DeleteStealthTURNRecords deletes a namespace's stealth TURNS DNS records.
func (drm *DNSRecordManager) DeleteStealthTURNRecords(ctx context.Context, namespaceName string) error {
internalCtx := client.WithInternalAuth(ctx)
deleteQuery := `DELETE FROM dns_records WHERE namespace = ?`
_, err := drm.db.Exec(internalCtx, deleteQuery, stealthDNSNamespace(namespaceName))
if err != nil {
return &ClusterError{
Message: "failed to delete stealth TURNS DNS records",
Cause: err,
}
}
return nil
}
// EnableNamespaceRecord marks a specific IP's record as active (for recovery)
func (drm *DNSRecordManager) EnableNamespaceRecord(ctx context.Context, namespaceName, ip string) error {
internalCtx := client.WithInternalAuth(ctx)

View File

@ -55,7 +55,7 @@ func TestGatewayWebRTCInSync_matchingBlock_returnsTrue(t *testing.T) {
func TestGatewayWebRTCInSync_eachFieldDriftDetected(t *testing.T) {
// Any single drifted field must trigger a restart. Pins that the
// comparison covers all four webrtc fields (a future refactor that
// comparison covers all five webrtc fields (a future refactor that
// drops one would silently let that field drift forever).
base := gateway.GatewayYAMLWebRTC{
Enabled: true, SFUPort: 30000,
@ -69,6 +69,7 @@ func TestGatewayWebRTCInSync_eachFieldDriftDetected(t *testing.T) {
{"sfu port changed", func(w *gateway.GatewayYAMLWebRTC) { w.SFUPort = 30001 }},
{"turn domain changed", func(w *gateway.GatewayYAMLWebRTC) { w.TURNDomain = "turn.other" }},
{"turn secret rotated", func(w *gateway.GatewayYAMLWebRTC) { w.TURNSecret = "rotated" }},
{"stealth domain changed", func(w *gateway.GatewayYAMLWebRTC) { w.TURNStealthDomain = "cdn-deadbeef0000.orama-devnet.network" }},
}
for _, tc := range mutations {
t.Run(tc.name, func(t *testing.T) {
@ -190,3 +191,25 @@ func TestReconcileGateway_missingConfigReturnsErrorNotRestart(t *testing.T) {
t.Error("missing config must return an error (don't blind-restart a healthy gateway)")
}
}
func TestGatewayWebRTCInSync_stealthEnableDetectedAsDrift(t *testing.T) {
// feat-124: enabling stealth must drift an otherwise-matching gateway so
// the reconciler rewrites its yaml with turn_stealth_domain and restarts
// it — that's how turn.credentials starts advertising turns:<host>:443.
onDisk := gateway.GatewayYAMLWebRTC{
Enabled: true, SFUPort: 30000,
TURNDomain: "turn.ns-anchat-test.orama-devnet.network", TURNSecret: "the-secret",
}
desired := desiredEnabled()
desired.TURNStealthDomain = "cdn-abc123def456.orama-devnet.network"
if gatewayWebRTCInSync(onDisk, desired) {
t.Error("stealth enable not detected as drift — gateway would never advertise the stealth URI")
}
// And once the yaml carries it, the same desired config is in-sync (no
// restart loop).
onDisk.TURNStealthDomain = desired.TURNStealthDomain
if !gatewayWebRTCInSync(onDisk, desired) {
t.Error("matching stealth domain reported as drift — restart loop")
}
}

View File

@ -11,11 +11,11 @@ import "testing"
// port is per-node (0 on a gateway-only node). Pins both the drift
// fallback and the non-SFU-gateway case.
// dbFetch signature: () -> (turnSecret, turnDomain string, sfuPort int).
func dbNone() (string, string, int) { return "", "", 0 }
// dbFetch signature: () -> (turnSecret, turnDomain, stealthDomain string, sfuPort int).
func dbNone() (string, string, string, int) { return "", "", "", 0 }
func dbFull(secret, domain string, sfuPort int) func() (string, string, int) {
return func() (string, string, int) { return secret, domain, sfuPort }
func dbFull(secret, domain string, sfuPort int) func() (string, string, string, int) {
return func() (string, string, string, int) { return secret, domain, "", sfuPort }
}
func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
@ -23,8 +23,8 @@ func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
// (the lazy dbFetch must not be called — saves a query on the hot
// restart path).
dbCalled := false
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret",
func() (string, string, int) { dbCalled = true; return dbNone() })
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "",
func() (string, string, string, int) { dbCalled = true; return dbNone() })
if dbCalled {
t.Error("DB fetch was called even though the state file had the TURN secret (should short-circuit)")
@ -41,7 +41,7 @@ func TestChooseRestoreWebRTC_staleStateFallsBackToDB(t *testing.T) {
// The bug-25 drift case: state file has NO webrtc (stale — written
// before enable), DB says enabled WITH an SFU port on this node. MUST
// fall back to the DB and re-materialize the full block.
got := chooseRestoreWebRTC(false, 0, "", "",
got := chooseRestoreWebRTC(false, 0, "", "", "",
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 7801))
if !got.enabled {
@ -65,7 +65,7 @@ func TestChooseRestoreWebRTC_nonSFUGatewayGetsTURNOnly(t *testing.T) {
// secret (so /v1/webrtc/turn/credentials registers + works) while
// sfuPort stays 0 (signal/rooms don't register). This is exactly node
// 57's situation — pre-fix it resolved to disabled and 404'd.
got := chooseRestoreWebRTC(false, 0, "", "",
got := chooseRestoreWebRTC(false, 0, "", "", "",
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 0)) // sfuPort 0 = no local SFU
if !got.enabled {
@ -84,8 +84,8 @@ func TestChooseRestoreWebRTC_stateHasTURNButNoSFU(t *testing.T) {
// false / port 0. Must use the state TURN secret with sfuPort=0 and
// NOT consult the DB (TURN secret present = complete enough).
dbCalled := false
got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret",
func() (string, string, int) { dbCalled = true; return dbNone() })
got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret", "",
func() (string, string, string, int) { dbCalled = true; return dbNone() })
if dbCalled {
t.Error("DB fetch called even though state file had the TURN secret")
@ -98,7 +98,7 @@ func TestChooseRestoreWebRTC_stateHasTURNButNoSFU(t *testing.T) {
func TestChooseRestoreWebRTC_bothEmptyDisabled(t *testing.T) {
// Namespace genuinely without WebRTC: state empty, DB returns nothing.
// Must return disabled so we don't register broken webrtc routes.
got := chooseRestoreWebRTC(false, 0, "", "", dbNone)
got := chooseRestoreWebRTC(false, 0, "", "", "", dbNone)
if got.enabled {
t.Errorf("want disabled when neither source has WebRTC; got %+v", got)
}
@ -109,8 +109,8 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {
// provisioned / shouldn't happen). The TURN secret is the
// enablement marker; without it we treat it as not-configured-for-
// TURN, but an SFU port alone still enables SFU routes.
got := chooseRestoreWebRTC(false, 0, "", "",
func() (string, string, int) { return "", "turn.db", 9000 })
got := chooseRestoreWebRTC(false, 0, "", "", "",
func() (string, string, string, int) { return "", "turn.db", "", 9000 })
// dbFetch only runs when state secret is empty; here it returns no
// secret, so the `if dbSecret != ""` guard means NOTHING is taken
// from the DB → disabled. (An SFU-only-no-TURN namespace is not a
@ -119,3 +119,39 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {
t.Errorf("DB returned no TURN secret: want disabled; got %+v", got)
}
}
// --- feat-124 stealth domain restore precedence ---
func TestChooseRestoreWebRTC_stealthFromStateFile(t *testing.T) {
// Stealth toggles rewrite cluster state, so a fresh state file carries
// the stealth domain and must win without a DB call.
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "cdn-abc123def456.dbrs.space",
func() (string, string, string, int) {
t.Error("DB fetch called even though state file was complete")
return dbNone()
})
if got.stealthDomain != "cdn-abc123def456.dbrs.space" {
t.Errorf("stealthDomain = %q; want state-file value", got.stealthDomain)
}
}
func TestChooseRestoreWebRTC_stealthFromDBOnStaleState(t *testing.T) {
// Stale state (no TURN secret) + DB has stealth enabled → stealth domain
// re-materializes from the DB alongside the rest of the WebRTC block.
got := chooseRestoreWebRTC(false, 0, "", "", "",
func() (string, string, string, int) {
return "db-secret", "turn.ns-x.dbrs.space", "cdn-abc123def456.dbrs.space", 7801
})
if !got.enabled || got.stealthDomain != "cdn-abc123def456.dbrs.space" {
t.Errorf("want stealth domain from DB on stale state; got %+v", got)
}
}
func TestChooseRestoreWebRTC_noStealthStaysEmpty(t *testing.T) {
// Stealth disabled everywhere → empty stealthDomain (gateway advertises
// the baseline 3-rung ladder only).
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "", dbNone)
if got.stealthDomain != "" {
t.Errorf("stealthDomain = %q; want empty when stealth is disabled", got.stealthDomain)
}
}

View File

@ -234,10 +234,11 @@ func (s *SystemdSpawner) SpawnGateway(ctx context.Context, namespace, nodeID str
// namespace gateways even though the host gateway had the key.
SecretsEncryptionKey: cfg.SecretsEncryptionKey,
WebRTC: gateway.GatewayYAMLWebRTC{
Enabled: cfg.WebRTCEnabled,
SFUPort: cfg.SFUPort,
TURNDomain: cfg.TURNDomain,
TURNSecret: cfg.TURNSecret,
Enabled: cfg.WebRTCEnabled,
SFUPort: cfg.SFUPort,
TURNDomain: cfg.TURNDomain,
TURNSecret: cfg.TURNSecret,
TURNStealthDomain: cfg.TURNStealthDomain,
},
}
@ -343,7 +344,8 @@ func gatewayWebRTCInSync(onDisk gateway.GatewayYAMLWebRTC, cfg gateway.InstanceC
return onDisk.Enabled == cfg.WebRTCEnabled &&
onDisk.SFUPort == cfg.SFUPort &&
onDisk.TURNSecret == cfg.TURNSecret &&
onDisk.TURNDomain == cfg.TURNDomain
onDisk.TURNDomain == cfg.TURNDomain &&
onDisk.TURNStealthDomain == cfg.TURNStealthDomain
}
// gatewayConfigInSync reports whether the full reconcile-relevant config on
@ -516,6 +518,68 @@ type TURNInstanceConfig struct {
RelayPortStart int // Start of relay port range
RelayPortEnd int // End of relay port range
TURNDomain string // TURN domain for Let's Encrypt cert (e.g., "turn.ns-myapp.orama-devnet.network")
// StealthDomain is the neutral stealth TURNS host (feat-124). When set,
// the TURN server carries a second Let's Encrypt cert for this name and
// serves it to TLS clients whose SNI matches — the path the SNI router
// forwards from :443. Stealth NEVER falls back to a self-signed cert: a
// cert clients reject is indistinguishable from being blocked.
StealthDomain string
}
// acmeInternalEndpoint is the gateway's internal ACME endpoint that the
// Caddyfile TURN-cert blocks point the orama DNS provider at.
const acmeInternalEndpoint = "http://localhost:6001/v1/internal/acme"
// turnCertProvisionTimeout bounds how long a TURN spawn waits for Caddy to
// provision a Let's Encrypt cert before falling back (primary domain) or
// failing (stealth domain).
const turnCertProvisionTimeout = 2 * time.Minute
// resolveTURNSCert resolves the TURNS cert/key pair for a domain.
//
// Let's Encrypt via Caddy is tried FIRST whenever a domain is set — the call
// is idempotent and instant when the cert is already in Caddy's storage. This
// ordering also self-heals nodes stuck on the self-signed fallback from an
// earlier failed provisioning (live devnet finding, feat-124): the old code
// never retried Caddy once a self-signed pair existed on disk, so strict TLS
// clients kept failing turns: validation forever.
//
// allowSelfSigned controls the fallback: the primary TURN domain may fall
// back to (or reuse) a self-signed pair at <configDir>/turn-{cert,key}.pem so
// baseline TURN stays up, while the stealth domain must hard-fail instead.
func (s *SystemdSpawner) resolveTURNSCert(namespace, domain, publicIP, configDir string, allowSelfSigned bool) (string, string, error) {
if domain != "" {
caddyCert, caddyKey, err := provisionTURNCertViaCaddy(domain, acmeInternalEndpoint, turnCertProvisionTimeout)
if err == nil {
s.logger.Info("Using Let's Encrypt cert from Caddy for TURNS",
zap.String("namespace", namespace),
zap.String("domain", domain),
zap.String("cert_path", caddyCert))
return caddyCert, caddyKey, nil
}
if !allowSelfSigned {
return "", "", fmt.Errorf("failed to provision Let's Encrypt cert for stealth TURNS domain %s (no self-signed fallback — clients must be able to validate it): %w", domain, err)
}
s.logger.Warn("Let's Encrypt cert provisioning failed, falling back to self-signed",
zap.String("namespace", namespace),
zap.String("domain", domain),
zap.Error(err))
}
if !allowSelfSigned {
return "", "", fmt.Errorf("no domain configured for TURNS cert in namespace %s", namespace)
}
certPath := filepath.Join(configDir, "turn-cert.pem")
keyPath := filepath.Join(configDir, "turn-key.pem")
if _, err := os.Stat(certPath); os.IsNotExist(err) {
if err := turn.GenerateSelfSignedCert(certPath, keyPath, publicIP); err != nil {
return "", "", fmt.Errorf("failed to generate TURNS self-signed cert for namespace %s: %w", namespace, err)
}
s.logger.Info("Generated TURNS self-signed certificate",
zap.String("namespace", namespace),
zap.String("cert_path", certPath))
}
return certPath, keyPath, nil
}
// SpawnTURN starts a TURN instance using systemd
@ -534,43 +598,48 @@ func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string
configPath := filepath.Join(configDir, fmt.Sprintf("turn-%s.yaml", nodeID))
// Provision TLS cert for TURNS — try Let's Encrypt via Caddy first, fall back to self-signed
certPath := filepath.Join(configDir, "turn-cert.pem")
keyPath := filepath.Join(configDir, "turn-key.pem")
// Provision TLS cert for TURNS — Let's Encrypt via Caddy first (idempotent,
// also upgrades nodes stuck on the self-signed fallback), self-signed as
// the primary-domain fallback only.
var certPath, keyPath string
if cfg.TURNSListenAddr != "" {
if _, err := os.Stat(certPath); os.IsNotExist(err) {
// Try Let's Encrypt via Caddy first
if cfg.TURNDomain != "" {
acmeEndpoint := "http://localhost:6001/v1/internal/acme"
caddyCert, caddyKey, provErr := provisionTURNCertViaCaddy(cfg.TURNDomain, acmeEndpoint, 2*time.Minute)
if provErr == nil {
certPath = caddyCert
keyPath = caddyKey
s.logger.Info("Using Let's Encrypt cert from Caddy for TURNS",
zap.String("namespace", namespace),
zap.String("domain", cfg.TURNDomain),
zap.String("cert_path", certPath))
} else {
s.logger.Warn("Let's Encrypt cert provisioning failed, falling back to self-signed",
zap.String("namespace", namespace),
zap.String("domain", cfg.TURNDomain),
zap.Error(provErr))
}
}
// Fallback: generate self-signed cert if no cert is available yet
if _, statErr := os.Stat(certPath); os.IsNotExist(statErr) {
if err := turn.GenerateSelfSignedCert(certPath, keyPath, cfg.PublicIP); err != nil {
s.logger.Warn("Failed to generate TURNS self-signed cert, TURNS will be disabled",
zap.String("namespace", namespace),
zap.Error(err))
cfg.TURNSListenAddr = "" // Disable TURNS if cert generation fails
} else {
s.logger.Info("Generated TURNS self-signed certificate",
zap.String("namespace", namespace),
zap.String("cert_path", certPath))
}
var certErr error
certPath, keyPath, certErr = s.resolveTURNSCert(namespace, cfg.TURNDomain, cfg.PublicIP, configDir, true)
if certErr != nil {
s.logger.Warn("Failed to resolve TURNS cert, TURNS will be disabled",
zap.String("namespace", namespace),
zap.Error(certErr))
cfg.TURNSListenAddr = "" // Disable TURNS if no cert is available
}
}
// Stealth TURNS cert (feat-124): requires a working TURNS listener and a
// CA-valid cert — hard error, never a silent downgrade, because the
// operator explicitly enabled stealth and a half-working stealth endpoint
// is invisible until a censored-region user fails to connect.
var stealthCertPath, stealthKeyPath string
if cfg.StealthDomain != "" {
// Security: the stealth domain arrives over the spawn protocol (mesh
// peers gated only by the static internal-auth header). Before it
// reaches the Caddyfile/ACME sink, pin it to the deterministic
// derivation so a forged value can't drive cert issuance for an
// attacker-chosen name. cfg.Realm is the base domain on every TURN
// spawn site. (provisionTURNCertViaCaddy adds a DNS-name allowlist as
// defense-in-depth.)
if cfg.Realm != "" {
want := turn.StealthHostForNamespace(cfg.Namespace, cfg.Realm)
if cfg.StealthDomain != want {
return fmt.Errorf("stealth domain %q does not match the derived host %q for namespace %s — refusing to provision", cfg.StealthDomain, want, cfg.Namespace)
}
}
if cfg.TURNSListenAddr == "" {
return fmt.Errorf("stealth TURNS for namespace %s requires an active TURNS listener (no TLS cert/listener available)", namespace)
}
var stealthErr error
stealthCertPath, stealthKeyPath, stealthErr = s.resolveTURNSCert(namespace, cfg.StealthDomain, cfg.PublicIP, configDir, false)
if stealthErr != nil {
return fmt.Errorf("failed to provision stealth TURNS cert for namespace %s: %w", namespace, stealthErr)
}
}
// Build TURN YAML config
@ -588,6 +657,11 @@ func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string
turnConfig.TLSCertPath = certPath
turnConfig.TLSKeyPath = keyPath
}
if stealthCertPath != "" {
turnConfig.StealthDomain = cfg.StealthDomain
turnConfig.TLSStealthCertPath = stealthCertPath
turnConfig.TLSStealthKeyPath = stealthKeyPath
}
configBytes, err := yaml.Marshal(turnConfig)
if err != nil {

View File

@ -5,10 +5,20 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
"time"
)
// dnsNamePattern matches a conservative lowercase DNS hostname. It exists to
// keep an operator/spawn-supplied domain from breaking out of the Caddyfile
// block it is interpolated into (a value containing '{', '}', or a newline
// could otherwise inject arbitrary Caddy directives) and to refuse cert
// provisioning for non-hostname junk. Security: defense-in-depth at the
// Caddyfile sink; the caller also pins the stealth domain to its deterministic
// derivation (systemd_spawner.go SpawnTURN).
var dnsNamePattern = regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$`)
const (
caddyfilePath = "/etc/caddy/Caddyfile"
@ -25,6 +35,12 @@ const (
// If Caddy is not available or cert provisioning times out, returns an error
// so the caller can fall back to a self-signed cert.
func provisionTURNCertViaCaddy(domain, acmeEndpoint string, timeout time.Duration) (certPath, keyPath string, err error) {
// Refuse anything that isn't a clean DNS name before it reaches the
// Caddyfile write — blocks Caddyfile-injection via crafted domains.
if !dnsNamePattern.MatchString(domain) {
return "", "", fmt.Errorf("refusing to provision TURNS cert for non-DNS-name domain %q", domain)
}
// Check if cert already exists from a previous provisioning
certPath, keyPath = caddyCertPaths(domain)
if _, err := os.Stat(certPath); err == nil {

View File

@ -0,0 +1,108 @@
package namespace
import (
"os"
"path/filepath"
"strings"
"testing"
"time"
"go.uber.org/zap"
)
// feat-124 — resolveTURNSCert semantics.
//
// On machines without a Caddyfile (tests, dev laptops) the Let's Encrypt
// branch fails fast with "failed to read Caddyfile", exercising exactly the
// fallback decision this function owns: primary domains degrade to a
// self-signed pair, the stealth domain must hard-fail instead.
func testSpawner(t *testing.T) *SystemdSpawner {
t.Helper()
return &SystemdSpawner{logger: zap.NewNop()}
}
func TestResolveTURNSCert_primaryFallsBackToSelfSigned(t *testing.T) {
s := testSpawner(t)
dir := t.TempDir()
certPath, keyPath, err := s.resolveTURNSCert("ns-test", "turn.ns-test.example.com", "203.0.113.7", dir, true)
if err != nil {
t.Fatalf("expected self-signed fallback, got error: %v", err)
}
if certPath != filepath.Join(dir, "turn-cert.pem") || keyPath != filepath.Join(dir, "turn-key.pem") {
t.Errorf("unexpected fallback paths: %s / %s", certPath, keyPath)
}
if _, statErr := os.Stat(certPath); statErr != nil {
t.Errorf("self-signed cert not written: %v", statErr)
}
}
func TestResolveTURNSCert_existingSelfSignedReused(t *testing.T) {
s := testSpawner(t)
dir := t.TempDir()
first, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", dir, true)
if err != nil {
t.Fatalf("first resolve: %v", err)
}
info1, err := os.Stat(first)
if err != nil {
t.Fatalf("stat first cert: %v", err)
}
second, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", dir, true)
if err != nil {
t.Fatalf("second resolve: %v", err)
}
info2, err := os.Stat(second)
if err != nil {
t.Fatalf("stat second cert: %v", err)
}
if first != second || info1.ModTime() != info2.ModTime() {
t.Error("existing self-signed pair was regenerated instead of reused")
}
}
func TestResolveTURNSCert_stealthNeverFallsBackToSelfSigned(t *testing.T) {
s := testSpawner(t)
dir := t.TempDir()
_, _, err := s.resolveTURNSCert("ns-test", "cdn-abc123def456.example.com", "203.0.113.7", dir, false)
if err == nil {
t.Fatal("stealth cert resolution must hard-fail without Let's Encrypt — a self-signed stealth cert is indistinguishable from being blocked")
}
if !strings.Contains(err.Error(), "cdn-abc123def456.example.com") {
t.Errorf("error must name the stealth domain for the operator; got: %v", err)
}
if _, statErr := os.Stat(filepath.Join(dir, "turn-cert.pem")); !os.IsNotExist(statErr) {
t.Error("stealth failure must not write a self-signed pair")
}
}
func TestResolveTURNSCert_noDomainNoFallbackErrors(t *testing.T) {
s := testSpawner(t)
_, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", t.TempDir(), false)
if err == nil {
t.Fatal("empty domain with self-signed disallowed must error")
}
}
// Security (feat-124): the Caddyfile sink must refuse any domain that isn't a
// clean DNS name, so a crafted value can't break out of the generated block
// and inject Caddy directives.
func TestProvisionTURNCertViaCaddy_rejectsNonDNSName(t *testing.T) {
bad := []string{
"example.com {\n reverse_proxy evil:1234\n}\n#",
"has space.com",
"UPPER.example.com",
"nodots",
"trailing-.example.com",
"",
}
for _, d := range bad {
if _, _, err := provisionTURNCertViaCaddy(d, "http://localhost:6001/v1/internal/acme", time.Second); err == nil {
t.Errorf("provisionTURNCertViaCaddy(%q) accepted a non-DNS-name domain", d)
}
}
}

View File

@ -94,8 +94,8 @@ const (
const (
// SFU media port range: 20000-29999
// Each namespace gets a 500-port sub-range for RTP media
SFUMediaPortRangeStart = 20000
SFUMediaPortRangeEnd = 29999
SFUMediaPortRangeStart = 20000
SFUMediaPortRangeEnd = 29999
SFUMediaPortsPerNamespace = 500
// SFU signaling ports: 30000-30099
@ -105,8 +105,8 @@ const (
// TURN relay port range: 49152-65535
// Each namespace gets an 800-port sub-range for TURN relay
TURNRelayPortRangeStart = 49152
TURNRelayPortRangeEnd = 65535
TURNRelayPortRangeStart = 49152
TURNRelayPortRangeEnd = 65535
TURNRelayPortsPerNamespace = 800
// TURN listen ports (standard)
@ -152,38 +152,38 @@ type NamespaceCluster struct {
// ClusterNode represents a node participating in a namespace cluster
type ClusterNode struct {
ID string `json:"id" db:"id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
NodeID string `json:"node_id" db:"node_id"`
Role NodeRole `json:"role" db:"role"`
RQLiteHTTPPort int `json:"rqlite_http_port,omitempty" db:"rqlite_http_port"`
RQLiteRaftPort int `json:"rqlite_raft_port,omitempty" db:"rqlite_raft_port"`
OlricHTTPPort int `json:"olric_http_port,omitempty" db:"olric_http_port"`
OlricMemberlistPort int `json:"olric_memberlist_port,omitempty" db:"olric_memberlist_port"`
GatewayHTTPPort int `json:"gateway_http_port,omitempty" db:"gateway_http_port"`
Status NodeStatus `json:"status" db:"status"`
ProcessPID int `json:"process_pid,omitempty" db:"process_pid"`
LastHeartbeat *time.Time `json:"last_heartbeat,omitempty" db:"last_heartbeat"`
ErrorMessage string `json:"error_message,omitempty" db:"error_message"`
RQLiteJoinAddress string `json:"rqlite_join_address,omitempty" db:"rqlite_join_address"`
OlricPeers string `json:"olric_peers,omitempty" db:"olric_peers"` // JSON array
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
ID string `json:"id" db:"id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
NodeID string `json:"node_id" db:"node_id"`
Role NodeRole `json:"role" db:"role"`
RQLiteHTTPPort int `json:"rqlite_http_port,omitempty" db:"rqlite_http_port"`
RQLiteRaftPort int `json:"rqlite_raft_port,omitempty" db:"rqlite_raft_port"`
OlricHTTPPort int `json:"olric_http_port,omitempty" db:"olric_http_port"`
OlricMemberlistPort int `json:"olric_memberlist_port,omitempty" db:"olric_memberlist_port"`
GatewayHTTPPort int `json:"gateway_http_port,omitempty" db:"gateway_http_port"`
Status NodeStatus `json:"status" db:"status"`
ProcessPID int `json:"process_pid,omitempty" db:"process_pid"`
LastHeartbeat *time.Time `json:"last_heartbeat,omitempty" db:"last_heartbeat"`
ErrorMessage string `json:"error_message,omitempty" db:"error_message"`
RQLiteJoinAddress string `json:"rqlite_join_address,omitempty" db:"rqlite_join_address"`
OlricPeers string `json:"olric_peers,omitempty" db:"olric_peers"` // JSON array
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
// PortBlock represents an allocated block of ports for a namespace on a node
type PortBlock struct {
ID string `json:"id" db:"id"`
NodeID string `json:"node_id" db:"node_id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
PortStart int `json:"port_start" db:"port_start"`
PortEnd int `json:"port_end" db:"port_end"`
RQLiteHTTPPort int `json:"rqlite_http_port" db:"rqlite_http_port"`
RQLiteRaftPort int `json:"rqlite_raft_port" db:"rqlite_raft_port"`
OlricHTTPPort int `json:"olric_http_port" db:"olric_http_port"`
OlricMemberlistPort int `json:"olric_memberlist_port" db:"olric_memberlist_port"`
GatewayHTTPPort int `json:"gateway_http_port" db:"gateway_http_port"`
AllocatedAt time.Time `json:"allocated_at" db:"allocated_at"`
ID string `json:"id" db:"id"`
NodeID string `json:"node_id" db:"node_id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
PortStart int `json:"port_start" db:"port_start"`
PortEnd int `json:"port_end" db:"port_end"`
RQLiteHTTPPort int `json:"rqlite_http_port" db:"rqlite_http_port"`
RQLiteRaftPort int `json:"rqlite_raft_port" db:"rqlite_raft_port"`
OlricHTTPPort int `json:"olric_http_port" db:"olric_http_port"`
OlricMemberlistPort int `json:"olric_memberlist_port" db:"olric_memberlist_port"`
GatewayHTTPPort int `json:"gateway_http_port" db:"gateway_http_port"`
AllocatedAt time.Time `json:"allocated_at" db:"allocated_at"`
}
// ClusterEvent represents an audit event for cluster lifecycle
@ -238,33 +238,39 @@ func (e *ClusterError) Unwrap() error {
}
var (
ErrNoPortsAvailable = &ClusterError{Message: "no ports available on node"}
ErrNodeAtCapacity = &ClusterError{Message: "node has reached maximum namespace instances"}
ErrInsufficientNodes = &ClusterError{Message: "insufficient nodes available for cluster"}
ErrClusterNotFound = &ClusterError{Message: "namespace cluster not found"}
ErrClusterAlreadyExists = &ClusterError{Message: "namespace cluster already exists"}
ErrProvisioningFailed = &ClusterError{Message: "cluster provisioning failed"}
ErrNamespaceNotFound = &ClusterError{Message: "namespace not found"}
ErrInvalidClusterStatus = &ClusterError{Message: "invalid cluster status for operation"}
ErrRecoveryInProgress = &ClusterError{Message: "recovery already in progress for this cluster"}
ErrWebRTCAlreadyEnabled = &ClusterError{Message: "WebRTC is already enabled for this namespace"}
ErrWebRTCNotEnabled = &ClusterError{Message: "WebRTC is not enabled for this namespace"}
ErrNoWebRTCPortsAvailable = &ClusterError{Message: "no WebRTC ports available on node"}
ErrNoPortsAvailable = &ClusterError{Message: "no ports available on node"}
ErrNodeAtCapacity = &ClusterError{Message: "node has reached maximum namespace instances"}
ErrInsufficientNodes = &ClusterError{Message: "insufficient nodes available for cluster"}
ErrClusterNotFound = &ClusterError{Message: "namespace cluster not found"}
ErrClusterAlreadyExists = &ClusterError{Message: "namespace cluster already exists"}
ErrProvisioningFailed = &ClusterError{Message: "cluster provisioning failed"}
ErrNamespaceNotFound = &ClusterError{Message: "namespace not found"}
ErrInvalidClusterStatus = &ClusterError{Message: "invalid cluster status for operation"}
ErrRecoveryInProgress = &ClusterError{Message: "recovery already in progress for this cluster"}
ErrWebRTCAlreadyEnabled = &ClusterError{Message: "WebRTC is already enabled for this namespace"}
ErrWebRTCNotEnabled = &ClusterError{Message: "WebRTC is not enabled for this namespace"}
ErrWebRTCStealthAlreadyEnabled = &ClusterError{Message: "WebRTC stealth is already enabled for this namespace"}
ErrWebRTCStealthNotEnabled = &ClusterError{Message: "WebRTC stealth is not enabled for this namespace"}
ErrNoWebRTCPortsAvailable = &ClusterError{Message: "no WebRTC ports available on node"}
)
// WebRTCConfig represents the per-namespace WebRTC configuration stored in the database
type WebRTCConfig struct {
ID string `json:"id" db:"id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
NamespaceName string `json:"namespace_name" db:"namespace_name"`
Enabled bool `json:"enabled" db:"enabled"`
TURNSharedSecret string `json:"-" db:"turn_shared_secret"` // Never serialize secret to JSON
TURNCredentialTTL int `json:"turn_credential_ttl" db:"turn_credential_ttl"`
SFUNodeCount int `json:"sfu_node_count" db:"sfu_node_count"`
TURNNodeCount int `json:"turn_node_count" db:"turn_node_count"`
EnabledBy string `json:"enabled_by" db:"enabled_by"`
EnabledAt time.Time `json:"enabled_at" db:"enabled_at"`
DisabledAt *time.Time `json:"disabled_at,omitempty" db:"disabled_at"`
ID string `json:"id" db:"id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
NamespaceName string `json:"namespace_name" db:"namespace_name"`
Enabled bool `json:"enabled" db:"enabled"`
TURNSharedSecret string `json:"-" db:"turn_shared_secret"` // Never serialize secret to JSON
TURNCredentialTTL int `json:"turn_credential_ttl" db:"turn_credential_ttl"`
SFUNodeCount int `json:"sfu_node_count" db:"sfu_node_count"`
TURNNodeCount int `json:"turn_node_count" db:"turn_node_count"`
// StealthEnabled gates the censorship-resistant TURNS:443 path (feat-124):
// stealth cert on the TURN servers, SNI route on :443, and the
// `turns:<stealth-host>:443` rung in the turn.credentials URI ladder.
StealthEnabled bool `json:"stealth_enabled" db:"stealth_enabled"`
EnabledBy string `json:"enabled_by" db:"enabled_by"`
EnabledAt time.Time `json:"enabled_at" db:"enabled_at"`
DisabledAt *time.Time `json:"disabled_at,omitempty" db:"disabled_at"`
}
// WebRTCRoom represents an active WebRTC room tracked in the database
@ -284,15 +290,15 @@ type WebRTCRoom struct {
// WebRTCPortBlock represents allocated WebRTC ports for a namespace on a node
type WebRTCPortBlock struct {
ID string `json:"id" db:"id"`
NodeID string `json:"node_id" db:"node_id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
ServiceType string `json:"service_type" db:"service_type"` // "sfu" or "turn"
ID string `json:"id" db:"id"`
NodeID string `json:"node_id" db:"node_id"`
NamespaceClusterID string `json:"namespace_cluster_id" db:"namespace_cluster_id"`
ServiceType string `json:"service_type" db:"service_type"` // "sfu" or "turn"
// SFU ports
SFUSignalingPort int `json:"sfu_signaling_port,omitempty" db:"sfu_signaling_port"`
SFUMediaPortStart int `json:"sfu_media_port_start,omitempty" db:"sfu_media_port_start"`
SFUMediaPortEnd int `json:"sfu_media_port_end,omitempty" db:"sfu_media_port_end"`
SFUSignalingPort int `json:"sfu_signaling_port,omitempty" db:"sfu_signaling_port"`
SFUMediaPortStart int `json:"sfu_media_port_start,omitempty" db:"sfu_media_port_start"`
SFUMediaPortEnd int `json:"sfu_media_port_end,omitempty" db:"sfu_media_port_end"`
// TURN ports
TURNListenPort int `json:"turn_listen_port,omitempty" db:"turn_listen_port"`

View File

@ -828,6 +828,7 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast").
NewFunctionBuilder().WithFunc(e.hEphemeralStateSet).Export("ephemeral_state_set").
NewFunctionBuilder().WithFunc(e.hEphemeralStateClear).Export("ephemeral_state_clear").
NewFunctionBuilder().WithFunc(e.hEphemeralStateList).Export("ephemeral_state_list").
NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke").
NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async").
NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info").
@ -1463,6 +1464,33 @@ func (e *Engine) hEphemeralStateClear(ctx context.Context, mod api.Module,
return 1
}
// hEphemeralStateList is the WASM-callable wrapper for EphemeralStateList —
// the bugboard #710 reconnect catch-up read.
//
// ABI: ephemeral_state_list(topicPtr, topicLen uint32) -> uint64 packed
// (ptr<<32 | len) pointing to a JSON envelope in guest memory:
//
// {"entries":[{"key":..,"client_id":..,"payload":<base64>,"expires_in_ms":..}, …]}
//
// Returns 0 on failure (empty topic, no invocation context, ephemeral state
// unavailable, or a guest-memory error). Unlike set/clear, no WS client is
// required — the read is namespace-scoped via the invocation context.
func (e *Engine) hEphemeralStateList(ctx context.Context, mod api.Module,
topicPtr, topicLen uint32) uint64 {
topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
if !ok {
return 0
}
out, err := e.hostServices.EphemeralStateList(ctx, string(topic))
if err != nil {
e.logger.Warn("host function ephemeral_state_list failed",
zap.String("topic", string(topic)),
zap.Error(err))
return 0
}
return e.executor.WriteToGuest(ctx, mod, out)
}
// hPushSend is the WASM-callable wrapper for PushSend.
// Inputs:
//

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"sort"
"sync"
"time"
)
@ -47,26 +48,29 @@ const (
ephemeralSweepInterval = 10 * time.Second
)
// EphemeralEventKind discriminates the synthetic events published on a topic.
type EphemeralEventKind string
// Synthetic-event discriminator values carried in the `_orama` field. The
// `_orama` control-frame namespace is the contract agreed with app teams on
// bugboard #710 (#458/#505/#849/#901) — the same dispatch pattern clients
// already use for the auth.refresh control frame from #321.
const (
EphemeralEventSet EphemeralEventKind = "set"
EphemeralEventClear EphemeralEventKind = "clear"
EphemeralEventSet = "ephemeral.set"
EphemeralEventClear = "ephemeral.clear"
)
// EphemeralEvent is the wire shape published on the topic when ephemeral state
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers key off
// Kind + Key to update their local view. Payload is only populated for "set".
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers dispatch
// on the `_orama` discriminator + Key to update their local view. Payload is
// only populated for "ephemeral.set".
type EphemeralEvent struct {
Type string `json:"__ephemeral"` // always "state"
Kind EphemeralEventKind `json:"kind"` // set | clear
Key string `json:"key"` // app-chosen key
ClientID string `json:"client_id"` // owning WS client
Type string `json:"_orama"` // "ephemeral.set" | "ephemeral.clear"
Topic string `json:"topic"` // the topic the state lives on (self-describing for sub-routers)
Key string `json:"key"` // app-chosen key
ClientID string `json:"client_id"` // owning WS client
// Payload is the opaque app-chosen blob (may be JSON, protobuf, or
// arbitrary bytes), present only for "set". encoding/json base64-encodes
// a []byte on the wire, so subscribers base64-decode "payload" to recover
// the original bytes — mirroring how pubsub_publish_batch carries data.
// arbitrary bytes), present only for "ephemeral.set". encoding/json
// base64-encodes a []byte on the wire, so subscribers base64-decode
// "payload" to recover the original bytes — mirroring how
// pubsub_publish_batch carries data.
Payload []byte `json:"payload,omitempty"`
Reason string `json:"reason,omitempty"` // clear only: explicit|disconnect|expired
}
@ -192,8 +196,8 @@ func (s *EphemeralStore) Set(ctx context.Context, namespace, clientID, topic, ke
s.mu.Unlock()
evt := EphemeralEvent{
Type: "state",
Kind: EphemeralEventSet,
Type: EphemeralEventSet,
Topic: topic,
Key: key,
ClientID: clientID,
Payload: payloadCopy,
@ -225,14 +229,60 @@ func (s *EphemeralStore) Clear(ctx context.Context, namespace, clientID, topic,
s.mu.Unlock()
return s.publishEvent(ctx, namespace, topic, EphemeralEvent{
Type: "state",
Kind: EphemeralEventClear,
Type: EphemeralEventClear,
Topic: topic,
Key: key,
ClientID: clientID,
Reason: "explicit",
})
}
// EphemeralListEntry is one live entry returned by List — the reconnect
// catch-up shape for the ephemeral_state_list host fn. ExpiresInMs is relative
// (remaining TTL) so callers don't need a synchronized clock.
type EphemeralListEntry struct {
Key string `json:"key"`
ClientID string `json:"client_id"`
Payload []byte `json:"payload,omitempty"`
ExpiresInMs int64 `json:"expires_in_ms"`
}
// List returns the live (non-expired) entries on a (namespace, topic), sorted
// by key for deterministic output. The reconnect catch-up path (bugboard #710
// acceptance): a client that just (re)subscribed reads the current state once,
// then tracks the ephemeral.set/ephemeral.clear event stream. Read-only — no
// ownership requirement, no WS client needed.
func (s *EphemeralStore) List(namespace, topic string) []EphemeralListEntry {
now := s.now()
s.mu.Lock()
entries := make([]EphemeralListEntry, 0)
for sk, entry := range s.values {
if sk.namespace != namespace || sk.topic != topic {
continue
}
if !now.Before(entry.expiresAt) {
// now >= expiresAt: hide it. Intentionally one tick stricter than
// sweepExpired (which removes only when now.After(expiresAt)) — a
// reconnect catch-up must never surface state that is at/past its
// deadline, even if the backstop sweeper hasn't run yet.
continue
}
payloadCopy := make([]byte, len(entry.payload))
copy(payloadCopy, entry.payload)
entries = append(entries, EphemeralListEntry{
Key: entry.key,
ClientID: entry.clientID,
Payload: payloadCopy,
ExpiresInMs: entry.expiresAt.Sub(now).Milliseconds(),
})
}
s.mu.Unlock()
sort.Slice(entries, func(i, j int) bool { return entries[i].Key < entries[j].Key })
return entries
}
// ClearClient removes every entry owned by clientID and publishes a clear
// event for each (reason "disconnect"). Called from the WS disconnect hook —
// the primary, zero-lag cleanup path. Safe to call for an unknown client.
@ -261,8 +311,8 @@ func (s *EphemeralStore) clearClientWithReason(ctx context.Context, clientID, re
for _, entry := range toClear {
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
Type: "state",
Kind: EphemeralEventClear,
Type: EphemeralEventClear,
Topic: entry.topic,
Key: entry.key,
ClientID: clientID,
Reason: reason,
@ -292,7 +342,7 @@ func (s *EphemeralStore) publishEvent(ctx context.Context, namespace, topic stri
return fmt.Errorf("ephemeral state: marshal event: %w", err)
}
if err := s.publish(ctx, namespace, topic, data); err != nil {
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Kind, err)
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Type, err)
}
return nil
}
@ -335,8 +385,8 @@ func (s *EphemeralStore) sweepExpired(ctx context.Context) {
for _, entry := range expired {
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
Type: "state",
Kind: EphemeralEventClear,
Type: EphemeralEventClear,
Topic: entry.topic,
Key: entry.key,
ClientID: entry.clientID,
Reason: "expired",

View File

@ -40,12 +40,12 @@ func (c *capturePublisher) snapshot() []capturedEvent {
return out
}
func (c *capturePublisher) countKind(kind EphemeralEventKind) int {
func (c *capturePublisher) countKind(eventType string) int {
c.mu.Lock()
defer c.mu.Unlock()
n := 0
for _, e := range c.events {
if e.event.Kind == kind {
if e.event.Type == eventType {
n++
}
}
@ -114,7 +114,7 @@ func TestEphemeralStore_SetThenDisconnect(t *testing.T) {
t.Errorf("disconnect clear events = %d, want 2", got)
}
for _, e := range pub.snapshot() {
if e.event.Kind == EphemeralEventClear && e.event.Reason != "disconnect" {
if e.event.Type == EphemeralEventClear && e.event.Reason != "disconnect" {
t.Errorf("clear reason = %q, want disconnect", e.event.Reason)
}
}
@ -149,7 +149,7 @@ func TestEphemeralStore_TTLExpiry(t *testing.T) {
// A clear event with reason=expired must have been published.
foundExpired := false
for _, e := range pub.snapshot() {
if e.event.Kind == EphemeralEventClear && e.event.Reason == "expired" {
if e.event.Type == EphemeralEventClear && e.event.Reason == "expired" {
foundExpired = true
}
}
@ -293,3 +293,130 @@ func TestEphemeralStore_OwnershipTransfer(t *testing.T) {
t.Errorf("new owner's disconnect did not clear, count=%d", s.keyCountForTest())
}
}
// TestEphemeralStore_wireFormatContract pins the EXACT JSON wire shape of the
// synthetic events — the `_orama` control-frame contract agreed with app teams
// on bugboard #710 (#458/#505/#849/#901). Client sub-routers dispatch on the
// `_orama` discriminator; renaming any of these fields is a breaking protocol
// change and must fail this test.
func TestEphemeralStore_wireFormatContract(t *testing.T) {
type raw struct {
Orama string `json:"_orama"`
Topic string `json:"topic"`
Key string `json:"key"`
ClientID string `json:"client_id"`
Payload []byte `json:"payload"`
Reason string `json:"reason"`
}
var got []raw
pub := func(_ context.Context, _, _ string, data []byte) error {
var r raw
if err := json.Unmarshal(data, &r); err != nil {
return err
}
got = append(got, r)
return nil
}
s := newTestStore(pub)
ctx := context.Background()
if err := s.Set(ctx, "ns1", "client-A", "typing:room1", "user-7", []byte("blob"), 0); err != nil {
t.Fatalf("Set: %v", err)
}
s.ClearClient(ctx, "client-A")
if len(got) != 2 {
t.Fatalf("expected 2 events (set + disconnect clear), got %d", len(got))
}
set, clear := got[0], got[1]
if set.Orama != "ephemeral.set" {
t.Errorf(`set _orama = %q, want "ephemeral.set"`, set.Orama)
}
if set.Topic != "typing:room1" || set.Key != "user-7" || set.ClientID != "client-A" {
t.Errorf("set event fields wrong: %+v", set)
}
if string(set.Payload) != "blob" {
t.Errorf("set payload = %q, want blob", set.Payload)
}
if clear.Orama != "ephemeral.clear" {
t.Errorf(`clear _orama = %q, want "ephemeral.clear"`, clear.Orama)
}
if clear.Topic != "typing:room1" || clear.Key != "user-7" || clear.Reason != "disconnect" {
t.Errorf("clear event fields wrong: %+v", clear)
}
}
func TestEphemeralStoreList_returnsLiveEntriesSorted(t *testing.T) {
s := newTestStore(nil)
ctx := context.Background()
if err := s.Set(ctx, "ns1", "client-B", "presence:room1", "zeta", []byte("z"), 0); err != nil {
t.Fatalf("Set zeta: %v", err)
}
if err := s.Set(ctx, "ns1", "client-A", "presence:room1", "alpha", []byte("a"), 0); err != nil {
t.Fatalf("Set alpha: %v", err)
}
entries := s.List("ns1", "presence:room1")
if len(entries) != 2 {
t.Fatalf("List returned %d entries, want 2", len(entries))
}
if entries[0].Key != "alpha" || entries[1].Key != "zeta" {
t.Errorf("entries not sorted by key: %q, %q", entries[0].Key, entries[1].Key)
}
if entries[0].ClientID != "client-A" || string(entries[0].Payload) != "a" {
t.Errorf("entry fields wrong: %+v", entries[0])
}
if entries[0].ExpiresInMs <= 0 {
t.Errorf("ExpiresInMs must be positive for a live entry, got %d", entries[0].ExpiresInMs)
}
}
func TestEphemeralStoreList_excludesExpiredAndOtherScopes(t *testing.T) {
s := newTestStore(nil)
ctx := context.Background()
base := time.Now()
s.now = func() time.Time { return base }
if err := s.Set(ctx, "ns1", "c", "t", "live", []byte("p"), 60_000); err != nil {
t.Fatalf("Set live: %v", err)
}
if err := s.Set(ctx, "ns1", "c", "t", "dying", []byte("p"), 1000); err != nil {
t.Fatalf("Set dying: %v", err)
}
if err := s.Set(ctx, "ns2", "c", "t", "other-ns", []byte("p"), 60_000); err != nil {
t.Fatalf("Set other-ns: %v", err)
}
if err := s.Set(ctx, "ns1", "c", "t2", "other-topic", []byte("p"), 60_000); err != nil {
t.Fatalf("Set other-topic: %v", err)
}
// Advance past "dying"'s TTL but do NOT sweep — List must hide it anyway.
s.now = func() time.Time { return base.Add(2 * time.Second) }
entries := s.List("ns1", "t")
if len(entries) != 1 || entries[0].Key != "live" {
t.Fatalf("List = %+v, want exactly the single live ns1/t entry", entries)
}
}
func TestEphemeralStoreList_emptyTopicReturnsEmpty(t *testing.T) {
s := newTestStore(nil)
if entries := s.List("ns1", "nothing-here"); len(entries) != 0 {
t.Errorf("List on empty topic = %+v, want empty", entries)
}
}
func TestEphemeralStoreList_snapshotIsDefensiveCopy(t *testing.T) {
s := newTestStore(nil)
ctx := context.Background()
if err := s.Set(ctx, "ns1", "c", "t", "k", []byte("orig"), 0); err != nil {
t.Fatalf("Set: %v", err)
}
entries := s.List("ns1", "t")
entries[0].Payload[0] = 'X'
fresh := s.List("ns1", "t")
if string(fresh[0].Payload) != "orig" {
t.Error("List payload is not a defensive copy; store was mutated")
}
}

View File

@ -146,6 +146,10 @@ func (m *mockHostServices) EphemeralStateClear(ctx context.Context, topic, key s
return nil
}
func (m *mockHostServices) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
return []byte(`{"entries":[]}`), nil
}
func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
return nil
}

View File

@ -220,6 +220,34 @@ func (h *HostFunctions) EphemeralStateClear(ctx context.Context, topic, key stri
return nil
}
// ephemeralListEnvelope is the JSON shape returned by EphemeralStateList —
// an object (not a bare array) so fields can be added without breaking
// existing WASM callers.
type ephemeralListEnvelope struct {
Entries []serverless.EphemeralListEntry `json:"entries"`
}
// EphemeralStateList returns the live ephemeral entries on a topic in the
// invocation's namespace (bugboard #710 reconnect catch-up). Read-only: no
// WS client required, so HTTP-invoked functions can serve snapshots too.
func (h *HostFunctions) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
if h.ephemeralStore == nil {
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
}
if topic == "" {
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("topic is required")}
}
cur := h.currentInvocationContext(ctx)
if cur == nil {
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("no invocation context")}
}
out, err := json.Marshal(ephemeralListEnvelope{Entries: h.ephemeralStore.List(cur.Namespace, topic)})
if err != nil {
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("marshal entries: %w", err)}
}
return out, nil
}
// WSSend sends data to a specific WebSocket client.
func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error {
if h.wsManager == nil {

View File

@ -259,6 +259,10 @@ func (m *MockHostServices) EphemeralStateClear(ctx context.Context, topic, key s
return nil
}
func (m *MockHostServices) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
return []byte(`{"entries":[]}`), nil
}
func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
return nil
}

View File

@ -595,6 +595,14 @@ type HostServices interface {
// non-owned key is a no-op. Errors only on no-WS-client / empty topic-key.
EphemeralStateClear(ctx context.Context, topic, key string) error
// EphemeralStateList returns the live entries on a topic in the current
// invocation's namespace as a JSON envelope:
// {"entries":[{"key":..,"client_id":..,"payload":<base64>,"expires_in_ms":..}, …]}
// The reconnect catch-up read (bugboard #710 acceptance): unlike
// Set/Clear it does NOT require a WS client in context — any function
// invocation may read. Errors on empty topic or no invocation context.
EphemeralStateList(ctx context.Context, topic string) ([]byte, error)
// WebSocket operations (only valid in WS context)
WSSend(ctx context.Context, clientID string, data []byte) error
WSBroadcast(ctx context.Context, topic string, data []byte) error

View File

@ -0,0 +1,129 @@
package sniproxy
import (
"strings"
"time"
"go.uber.org/zap"
)
// discoveryWarnInterval rate-limits the "discovery scan failed" warning so a
// persistently-unreadable namespaces directory cannot flood the journal.
const discoveryWarnInterval = 5 * time.Minute
// StaticRoutes returns the operator-set routes parsed from the SNI router's own
// config file plus the fallback backend. The discoverer merges these with the
// auto-discovered TURN routes; static routes win on an SNI conflict.
type StaticRoutes func() (routes []Route, fallback Backend, err error)
// TURNRouteDiscoverer periodically rescans the namespaces directory for
// per-namespace TURNS listeners, merges the discovered routes with the static
// routes from the config file (static wins on conflict), and atomically
// installs the result on the Router.
//
// A transient failure (unreadable namespaces dir, or a bad static-config read)
// logs a rate-limited warning and KEEPS the previously-installed routes — a
// filesystem hiccup must never blackhole live :443 traffic.
type TURNRouteDiscoverer struct {
cfg TURNDiscoveryConfig
static StaticRoutes
router *Router
logger *zap.Logger
// lastWarn is only touched by the Run goroutine after the synchronous
// startup Apply, so it needs no lock.
lastWarn time.Time
}
// NewTURNRouteDiscoverer constructs a discoverer. static reads the operator's
// config-file routes + fallback; router receives the merged Replace calls.
func NewTURNRouteDiscoverer(cfg TURNDiscoveryConfig, static StaticRoutes, router *Router, logger *zap.Logger) *TURNRouteDiscoverer {
if logger == nil {
logger = zap.NewNop()
}
return &TURNRouteDiscoverer{cfg: cfg, static: static, router: router, logger: logger}
}
// Apply performs one scan+merge and installs the result atomically. On any
// transient error it returns the error and leaves the Router untouched so the
// caller can decide whether to fail startup (Apply) or keep stale routes (Run).
func (d *TURNRouteDiscoverer) Apply() error {
staticRoutes, fallback, err := d.static()
if err != nil {
return err
}
discovered, err := DiscoverTURNRoutes(d.cfg, d.logger)
if err != nil {
return err
}
merged := mergeRoutes(staticRoutes, discovered)
d.router.Replace(merged, fallback)
return nil
}
// Run scans immediately, then every rescan interval until stop is closed. A
// failed scan keeps the current routes and logs a rate-limited warning.
func (d *TURNRouteDiscoverer) Run(stop <-chan struct{}) {
if err := d.Apply(); err != nil {
d.warn("initial TURN route discovery failed; serving config-file routes only", err)
}
interval := d.cfg.RescanInterval
if interval <= 0 {
interval = DefaultDiscoveryRescanInterval
}
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-stop:
return
case <-ticker.C:
if err := d.Apply(); err != nil {
d.warn("TURN route discovery failed; keeping current routes", err)
continue
}
}
}
}
// warn logs at most once per discoveryWarnInterval to avoid journal flooding
// when the namespaces directory is persistently unreadable.
func (d *TURNRouteDiscoverer) warn(msg string, err error) {
now := time.Now()
if now.Sub(d.lastWarn) < discoveryWarnInterval {
return
}
d.lastWarn = now
d.logger.Warn(msg,
zap.String("namespaces_dir", d.cfg.NamespacesDir),
zap.Error(err))
}
// mergeRoutes combines static and discovered routes with static taking
// precedence on an SNI-match conflict. Static routes keep their original order
// and precede discovered ones, matching Router.Pick's first-match semantics.
func mergeRoutes(static, discovered []Route) []Route {
seen := make(map[string]struct{}, len(static))
merged := make([]Route, 0, len(static)+len(discovered))
for _, r := range static {
seen[matchKey(r.Match)] = struct{}{}
merged = append(merged, r)
}
for _, r := range discovered {
if _, conflict := seen[matchKey(r.Match)]; conflict {
continue // static wins
}
merged = append(merged, r)
}
return merged
}
// matchKey normalizes an SNI match for conflict comparison (matching is
// case-insensitive, mirroring Router.Pick / matchSNI).
func matchKey(match string) string {
return strings.ToLower(match)
}

View File

@ -0,0 +1,143 @@
package sniproxy
import (
"errors"
"path/filepath"
"testing"
"github.com/DeBrosOfficial/network/pkg/turn"
)
// TestTURNRouteDiscoverer_staticRouteWinsMerge verifies that when a discovered
// stealth route collides with a static config route on the same SNI, the static
// route's backend is the one that ends up in the router (static wins).
func TestTURNRouteDiscoverer_staticRouteWinsMerge(t *testing.T) {
dir := t.TempDir()
const base = "example.com"
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
stealthHost := turn.StealthHostForNamespace("anchat", base)
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
// Static config pins the very same stealth host to a DIFFERENT backend.
static := func() ([]Route, Backend, error) {
return []Route{
{Match: stealthHost, Backend: Backend{Name: "static-override", Network: "tcp", Addr: "127.0.0.1:9999"}},
}, fallback, nil
}
router := NewRouter(Backend{})
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: dir, BaseDomain: base}, static, router, nil)
if err := d.Apply(); err != nil {
t.Fatalf("Apply failed: %v", err)
}
// Pick must return the static backend, not the discovered one.
got := router.Pick(stealthHost)
if got.Addr != "127.0.0.1:9999" {
t.Errorf("static route should win: got backend %q, want 127.0.0.1:9999", got.Addr)
}
// The non-conflicting discovered alias must still be present.
alias := router.Pick("turn.ns-anchat." + base)
if alias.Addr != "127.0.0.1:5349" {
t.Errorf("discovered alias route missing/wrong: got %q", alias.Addr)
}
// Fallback preserved from static source.
if router.Fallback().Addr != "127.0.0.1:8443" {
t.Errorf("fallback not preserved: got %q", router.Fallback().Addr)
}
}
// TestTURNRouteDiscoverer_transientErrorKeepsPreviousRoutes verifies that once
// routes are installed, a subsequent Apply whose scan fails (namespaces dir
// removed) returns an error and leaves the previously-installed routes intact —
// a transient filesystem error must never blackhole :443.
func TestTURNRouteDiscoverer_transientErrorKeepsPreviousRoutes(t *testing.T) {
parent := t.TempDir()
nsDir := filepath.Join(parent, "namespaces")
const base = "example.com"
writeTURNConfig(t, nsDir, "anchat", "node-1", "0.0.0.0:5349")
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
static := func() ([]Route, Backend, error) { return nil, fallback, nil }
router := NewRouter(Backend{})
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: nsDir, BaseDomain: base}, static, router, nil)
// First Apply succeeds and installs the anchat routes.
if err := d.Apply(); err != nil {
t.Fatalf("first Apply failed: %v", err)
}
before := len(router.Routes())
if before != 2 {
t.Fatalf("expected 2 routes after first apply, got %d", before)
}
// Make the namespaces dir unreadable by pointing the discoverer at a now-
// removed path (simulate transient read failure).
d.cfg.NamespacesDir = filepath.Join(parent, "gone")
err := d.Apply()
if err == nil {
t.Fatalf("expected Apply to error on missing namespaces dir")
}
// Routes must be unchanged — the failed scan kept the previous table.
after := router.Routes()
if len(after) != before {
t.Errorf("routes changed on transient error: had %d, now %d", before, len(after))
}
stealthHost := turn.StealthHostForNamespace("anchat", base)
if router.Pick(stealthHost).Addr != "127.0.0.1:5349" {
t.Errorf("previously-installed stealth route lost after transient error")
}
}
// TestTURNRouteDiscoverer_staticSourceErrorKeepsRoutes verifies a failing static
// source (e.g. a bad config-file edit) also leaves the router untouched.
func TestTURNRouteDiscoverer_staticSourceErrorKeepsRoutes(t *testing.T) {
dir := t.TempDir()
const base = "example.com"
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
good := func() ([]Route, Backend, error) { return nil, fallback, nil }
router := NewRouter(Backend{})
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: dir, BaseDomain: base}, good, router, nil)
if err := d.Apply(); err != nil {
t.Fatalf("first Apply failed: %v", err)
}
before := len(router.Routes())
// Swap in a static source that errors (simulates a malformed config file).
d.static = func() ([]Route, Backend, error) { return nil, Backend{}, errors.New("bad config") }
if err := d.Apply(); err == nil {
t.Fatalf("expected Apply to error on static source failure")
}
if len(router.Routes()) != before {
t.Errorf("routes changed on static-source error: had %d, now %d", before, len(router.Routes()))
}
}
// TestMergeRoutes_staticPrecedesDiscovered checks first-match ordering: static
// routes precede discovered ones in the merged slice.
func TestMergeRoutes_staticPrecedesDiscovered(t *testing.T) {
static := []Route{{Match: "a.example.com", Backend: Backend{Addr: "127.0.0.1:1"}}}
discovered := []Route{
{Match: "a.example.com", Backend: Backend{Addr: "127.0.0.1:2"}}, // conflict, dropped
{Match: "b.example.com", Backend: Backend{Addr: "127.0.0.1:3"}},
}
merged := mergeRoutes(static, discovered)
if len(merged) != 2 {
t.Fatalf("expected 2 merged routes (1 static + 1 non-conflicting), got %d: %+v", len(merged), merged)
}
if merged[0].Match != "a.example.com" || merged[0].Backend.Addr != "127.0.0.1:1" {
t.Errorf("static route should be first and unchanged: %+v", merged[0])
}
if merged[1].Match != "b.example.com" {
t.Errorf("non-conflicting discovered route missing: %+v", merged)
}
}

View File

@ -0,0 +1,185 @@
package sniproxy
import (
"fmt"
"net"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/DeBrosOfficial/network/pkg/turn"
"go.uber.org/zap"
"gopkg.in/yaml.v3"
)
// DefaultDiscoveryRescanInterval is the default cadence at which the TURN route
// discoverer rescans the namespaces directory. SNI route changes (a namespace
// gaining or losing its TURNS listener) are infrequent, so 30s of detection
// latency is acceptable and keeps load on the filesystem negligible.
const DefaultDiscoveryRescanInterval = 30 * time.Second
// turnConfigGlob matches the per-node TURN config files the namespace spawner
// writes under "<namespaces_dir>/<namespace>/configs/turn-<nodeID>.yaml".
const turnConfigGlob = "configs/turn-*.yaml"
// stealthBackendNamePrefix labels discovered TURN backends in logs/metrics.
const stealthBackendNamePrefix = "turn-stealth-"
// turnBackendStealthHostLabel and turnBackendNamespaceLabel are the two SNI
// hostname shapes the router forwards to a namespace's TURNS listener.
// - the bland hashed host from turn.StealthHostForNamespace (DPI-resistant)
// - a human-readable "turn.ns-<namespace>.<base_domain>" alias (operator UX)
// TURNDiscoveryConfig configures the namespaces scan that derives per-namespace
// stealth-TURN routes. All fields are required; a zero RescanInterval selects
// DefaultDiscoveryRescanInterval.
type TURNDiscoveryConfig struct {
// NamespacesDir is the directory holding one subdirectory per namespace,
// each containing a "configs/turn-*.yaml" written by the namespace spawner
// (e.g. "/opt/orama/.orama/data/namespaces").
NamespacesDir string `yaml:"namespaces_dir"`
// BaseDomain is the cluster's base domain (e.g. "orama-devnet.network"),
// used to derive the stealth and "turn.ns-*" SNI hostnames.
BaseDomain string `yaml:"base_domain"`
// RescanInterval is how often the namespaces directory is rescanned. Zero
// selects DefaultDiscoveryRescanInterval.
RescanInterval time.Duration `yaml:"rescan_interval"`
}
// Validate reports configuration errors. It does not touch the filesystem; a
// missing NamespacesDir at scan time is a transient error handled by the
// discoverer (previous routes are kept), not a config error.
func (c *TURNDiscoveryConfig) Validate() []string {
var errs []string
if c.NamespacesDir == "" {
errs = append(errs, "turn_discovery.namespaces_dir: required")
}
if c.BaseDomain == "" {
errs = append(errs, "turn_discovery.base_domain: required")
}
return errs
}
// DiscoverTURNRoutes scans cfg.NamespacesDir for per-namespace TURN configs and
// returns two routes per namespace that exposes a TURNS listener:
//
// - turn.StealthHostForNamespace(namespace, baseDomain) -> 127.0.0.1:<tls-port>
// - "turn.ns-<namespace>.<baseDomain>" -> 127.0.0.1:<tls-port>
//
// Namespaces whose TURN config has an empty turns_listen_addr (TURNS disabled)
// are skipped. A turn-*.yaml that cannot be read or parsed is skipped with a
// per-file warning, but the scan continues for the rest — one bad file must not
// hide every other namespace's routes.
//
// A failure to read the namespaces directory itself returns an error so callers
// can keep the previously-installed routes rather than wiping them on a
// transient filesystem error.
func DiscoverTURNRoutes(cfg TURNDiscoveryConfig, logger *zap.Logger) ([]Route, error) {
if logger == nil {
logger = zap.NewNop()
}
entries, err := os.ReadDir(cfg.NamespacesDir)
if err != nil {
return nil, fmt.Errorf("read namespaces dir %s: %w", cfg.NamespacesDir, err)
}
var routes []Route
for _, entry := range entries {
if !entry.IsDir() {
continue
}
nsRoutes := discoverNamespaceRoutes(cfg, entry.Name(), logger)
routes = append(routes, nsRoutes...)
}
// Deterministic order keeps Router.Replace idempotent and tests stable.
sort.Slice(routes, func(i, j int) bool { return routes[i].Match < routes[j].Match })
return routes, nil
}
// discoverNamespaceRoutes resolves the stealth + alias routes for a single
// namespace directory. Returns nil when the namespace has no TURNS listener or
// its config is unreadable/unparseable (logged, not fatal).
func discoverNamespaceRoutes(cfg TURNDiscoveryConfig, nsDir string, logger *zap.Logger) []Route {
glob := filepath.Join(cfg.NamespacesDir, nsDir, turnConfigGlob)
matches, err := filepath.Glob(glob)
if err != nil {
// Glob only errors on a malformed pattern, which turnConfigGlob is not;
// guard anyway so a future edit can't silently swallow it.
logger.Warn("turn-config glob failed",
zap.String("namespace_dir", nsDir), zap.Error(err))
return nil
}
for _, configPath := range matches {
namespace, tlsPort, ok := parseTURNConfig(configPath, logger)
if !ok {
continue
}
backend := Backend{
Name: stealthBackendNamePrefix + namespace,
Network: "tcp",
Addr: net.JoinHostPort("127.0.0.1", tlsPort),
}
return []Route{
{Match: turn.StealthHostForNamespace(namespace, cfg.BaseDomain), Backend: backend},
{Match: fmt.Sprintf("turn.ns-%s.%s", namespace, cfg.BaseDomain), Backend: backend},
}
}
return nil
}
// parseTURNConfig reads a turn-*.yaml and returns its namespace and TURNS port.
// ok is false (with a warning) when the file is unreadable/unparseable, when it
// names no namespace, or when TURNS is disabled (empty turns_listen_addr).
func parseTURNConfig(path string, logger *zap.Logger) (namespace, tlsPort string, ok bool) {
data, err := os.ReadFile(path)
if err != nil {
logger.Warn("read turn config failed", zap.String("path", path), zap.Error(err))
return "", "", false
}
var c turn.Config
if err := yaml.Unmarshal(data, &c); err != nil {
logger.Warn("parse turn config failed", zap.String("path", path), zap.Error(err))
return "", "", false
}
if c.Namespace == "" {
logger.Warn("turn config has empty namespace", zap.String("path", path))
return "", "", false
}
if strings.TrimSpace(c.TURNSListenAddr) == "" {
// TURNS disabled for this namespace — no stealth route, not an error.
return "", "", false
}
port, err := portFromListenAddr(c.TURNSListenAddr)
if err != nil {
logger.Warn("turn config has invalid turns_listen_addr",
zap.String("path", path),
zap.String("turns_listen_addr", c.TURNSListenAddr),
zap.Error(err))
return "", "", false
}
return c.Namespace, port, true
}
// portFromListenAddr extracts the port from a "host:port" TURNS listen address
// (e.g. "0.0.0.0:5349" -> "5349"). The router always dials 127.0.0.1, so only
// the port is needed.
func portFromListenAddr(addr string) (string, error) {
_, port, err := net.SplitHostPort(addr)
if err != nil {
return "", fmt.Errorf("split host:port: %w", err)
}
if port == "" {
return "", fmt.Errorf("empty port in %q", addr)
}
return port, nil
}

View File

@ -0,0 +1,167 @@
package sniproxy
import (
"os"
"path/filepath"
"testing"
"github.com/DeBrosOfficial/network/pkg/turn"
)
// writeTURNConfig is a test helper that lays out the on-disk shape the namespace
// spawner produces: <namespacesDir>/<namespace>/configs/turn-<nodeID>.yaml.
func writeTURNConfig(t *testing.T, namespacesDir, namespace, nodeID, turnsAddr string) {
t.Helper()
configDir := filepath.Join(namespacesDir, namespace, "configs")
if err := os.MkdirAll(configDir, 0755); err != nil {
t.Fatalf("mkdir configs failed: %v", err)
}
content := "namespace: \"" + namespace + "\"\n"
content += "turns_listen_addr: \"" + turnsAddr + "\"\n"
path := filepath.Join(configDir, "turn-"+nodeID+".yaml")
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
t.Fatalf("write turn config failed: %v", err)
}
}
// TestDiscoverTURNRoutes_scansFixtureDir verifies that two namespaces each with
// a TURNS listener yield two routes apiece (stealth host + turn.ns-* alias),
// while a namespace with an empty turns_listen_addr is skipped entirely.
func TestDiscoverTURNRoutes_scansFixtureDir(t *testing.T) {
dir := t.TempDir()
const base = "orama-devnet.network"
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
writeTURNConfig(t, dir, "video", "node-1", "0.0.0.0:5350")
// TURNS disabled — must produce no routes.
writeTURNConfig(t, dir, "noturns", "node-1", "")
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
NamespacesDir: dir,
BaseDomain: base,
}, nil)
if err != nil {
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
}
// 2 namespaces with TURNS × 2 routes each = 4.
if len(routes) != 4 {
t.Fatalf("expected 4 routes, got %d: %+v", len(routes), routes)
}
got := map[string]string{}
for _, r := range routes {
got[r.Match] = r.Backend.Addr
}
// anchat: backend port 5349, stealth host + alias.
anchatStealth := turn.StealthHostForNamespace("anchat", base)
if got[anchatStealth] != "127.0.0.1:5349" {
t.Errorf("anchat stealth route missing/wrong: %q -> %q", anchatStealth, got[anchatStealth])
}
if got["turn.ns-anchat."+base] != "127.0.0.1:5349" {
t.Errorf("anchat alias route missing/wrong: got %q", got["turn.ns-anchat."+base])
}
// video: backend port 5350.
videoStealth := turn.StealthHostForNamespace("video", base)
if got[videoStealth] != "127.0.0.1:5350" {
t.Errorf("video stealth route missing/wrong: %q -> %q", videoStealth, got[videoStealth])
}
if got["turn.ns-video."+base] != "127.0.0.1:5350" {
t.Errorf("video alias route missing/wrong: got %q", got["turn.ns-video."+base])
}
// The disabled namespace must not appear under any of its hostnames.
if _, ok := got["turn.ns-noturns."+base]; ok {
t.Errorf("noturns namespace should be skipped (empty turns_listen_addr)")
}
}
// TestDiscoverTURNRoutes_emptyTURNSAddrSkipped is a focused check that a single
// namespace with an empty turns_listen_addr produces zero routes (no error).
func TestDiscoverTURNRoutes_emptyTURNSAddrSkipped(t *testing.T) {
dir := t.TempDir()
writeTURNConfig(t, dir, "noturns", "node-1", "")
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
NamespacesDir: dir,
BaseDomain: "example.com",
}, nil)
if err != nil {
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
}
if len(routes) != 0 {
t.Errorf("expected 0 routes for TURNS-disabled namespace, got %d: %+v", len(routes), routes)
}
}
// TestDiscoverTURNRoutes_unreadableDirReturnsError verifies a missing namespaces
// directory is a transient error (so callers keep previous routes), not a silent
// empty result.
func TestDiscoverTURNRoutes_unreadableDirReturnsError(t *testing.T) {
missing := filepath.Join(t.TempDir(), "does-not-exist")
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
NamespacesDir: missing,
BaseDomain: "example.com",
}, nil)
if err == nil {
t.Fatalf("expected an error for unreadable namespaces dir, got nil (routes=%+v)", routes)
}
if routes != nil {
t.Errorf("expected nil routes on error, got %+v", routes)
}
}
// TestDiscoverTURNRoutes_malformedFileSkipped verifies one unparseable
// turn-*.yaml is skipped while a sibling valid namespace still yields routes
// (one bad file must not hide the rest).
func TestDiscoverTURNRoutes_malformedFileSkipped(t *testing.T) {
dir := t.TempDir()
const base = "example.com"
writeTURNConfig(t, dir, "good", "node-1", "0.0.0.0:5349")
badDir := filepath.Join(dir, "bad", "configs")
if err := os.MkdirAll(badDir, 0755); err != nil {
t.Fatalf("mkdir bad configs failed: %v", err)
}
if err := os.WriteFile(filepath.Join(badDir, "turn-node-1.yaml"), []byte(":\n not: [valid"), 0644); err != nil {
t.Fatalf("write malformed config failed: %v", err)
}
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
NamespacesDir: dir,
BaseDomain: base,
}, nil)
if err != nil {
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
}
if len(routes) != 2 {
t.Fatalf("expected 2 routes from the good namespace, got %d: %+v", len(routes), routes)
}
goodStealth := turn.StealthHostForNamespace("good", base)
found := false
for _, r := range routes {
if r.Match == goodStealth {
found = true
}
}
if !found {
t.Errorf("good namespace stealth route missing despite malformed sibling")
}
}
// TestTURNDiscoveryConfig_Validate covers the required-field validation.
func TestTURNDiscoveryConfig_Validate(t *testing.T) {
if errs := (&TURNDiscoveryConfig{NamespacesDir: "/x", BaseDomain: "example.com"}).Validate(); len(errs) != 0 {
t.Errorf("valid config reported errors: %v", errs)
}
if errs := (&TURNDiscoveryConfig{BaseDomain: "example.com"}).Validate(); len(errs) == 0 {
t.Errorf("missing namespaces_dir should be invalid")
}
if errs := (&TURNDiscoveryConfig{NamespacesDir: "/x"}).Validate(); len(errs) == 0 {
t.Errorf("missing base_domain should be invalid")
}
}

View File

@ -36,6 +36,27 @@ type Config struct {
// Namespace this TURN instance belongs to
Namespace string `yaml:"namespace"`
// StealthDomain is the neutral, CDN-bland SNI hostname this server also
// answers TURNS for (e.g. "cdn-a1b2c3d4e5f6.orama-devnet.network").
//
// The stealth endpoint is an SNI-router passthrough, NOT a separate TURN
// server: a router on :443 reads only the TLS ClientHello SNI and forwards
// the raw bytes for this hostname to this same TURNS listener. TLS is still
// terminated here, by this TURN server, which therefore presents two certs
// (the primary TURN domain and StealthDomain) selected by ClientHello SNI.
// When empty, the stealth endpoint is disabled and behavior is unchanged.
StealthDomain string `yaml:"stealth_domain,omitempty"`
// TLSStealthCertPath is the path to the TLS certificate PEM file presented
// for StealthDomain. The SNI router only forwards bytes; this TURN server
// terminates the TLS handshake, so it needs the stealth domain's cert here.
TLSStealthCertPath string `yaml:"tls_stealth_cert_path,omitempty"`
// TLSStealthKeyPath is the path to the TLS private key PEM file for the
// StealthDomain certificate (TURN terminates TLS for the router-forwarded
// stealth connections).
TLSStealthKeyPath string `yaml:"tls_stealth_key_path,omitempty"`
}
// Validate checks the TURN configuration for errors

View File

@ -15,6 +15,11 @@ import (
"go.uber.org/zap"
)
// stealthConfigFieldCount is the number of stealth TLS config fields that must
// be set together (StealthDomain, TLSStealthCertPath, TLSStealthKeyPath). Any
// other count is a partial config and fails server startup.
const stealthConfigFieldCount = 3
// Server wraps a Pion TURN server with namespace-scoped HMAC-SHA1 authentication.
type Server struct {
config *Config
@ -24,8 +29,9 @@ type Server struct {
tcpListener net.Listener // Plain TCP listener on primary port (3478)
tlsListener net.Listener // TLS TCP listener for TURNS (port 5349)
certReloader *certReloader // hot-reloads the TURNS cert; nil when TURNS disabled
certStop chan struct{} // closed to stop the cert-reload watcher goroutine
certReloader *certReloader // hot-reloads the primary TURNS cert; nil when TURNS disabled
stealthCertReloader *certReloader // hot-reloads the stealth-SNI cert; nil when stealth disabled
certStop chan struct{} // closed to stop the cert-reload watcher goroutine(s)
}
// NewServer creates and starts a TURN server.
@ -94,8 +100,18 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
s.closeListeners()
return nil, fmt.Errorf("failed to load TLS cert/key: %w", err)
}
s.certReloader = reloader
// Stealth SNI: when configured, terminate TLS for a second (neutral)
// hostname using its own hot-reloading cert. The SNI router forwards the
// raw stealth-domain bytes to this listener; selection is by ServerName.
if err := s.loadStealthCertReloader(cfg); err != nil {
s.closeListeners()
return nil, err
}
tlsConfig := &tls.Config{
GetCertificate: reloader.GetCertificate,
GetCertificate: newGetCertificate(cfg.StealthDomain, reloader, s.stealthCertReloader),
MinVersion: tls.VersionTLS12,
}
tlsListener, err := tls.Listen("tcp", cfg.TURNSListenAddr, tlsConfig)
@ -104,9 +120,11 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
return nil, fmt.Errorf("failed to listen on %s: %w", cfg.TURNSListenAddr, err)
}
s.tlsListener = tlsListener
s.certReloader = reloader
s.certStop = make(chan struct{})
go reloader.watch(turnCertReloadInterval, s.certStop)
if s.stealthCertReloader != nil {
go s.stealthCertReloader.watch(turnCertReloadInterval, s.certStop)
}
listenerConfigs = append(listenerConfigs, pionTurn.ListenerConfig{
Listener: tlsListener,
@ -150,6 +168,62 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
return s, nil
}
// loadStealthCertReloader sets up the second cert reloader used for the stealth
// SNI hostname, storing it on s.stealthCertReloader. The three stealth fields
// (StealthDomain, TLSStealthCertPath, TLSStealthKeyPath) are all-or-nothing: a
// partial config is an operator mistake and fails startup rather than silently
// running without the stealth endpoint. When none are set, stealth is disabled
// and the primary TLS path is byte-for-byte unchanged.
func (s *Server) loadStealthCertReloader(cfg *Config) error {
set := 0
if cfg.StealthDomain != "" {
set++
}
if cfg.TLSStealthCertPath != "" {
set++
}
if cfg.TLSStealthKeyPath != "" {
set++
}
if set == 0 {
return nil // stealth disabled
}
if set != stealthConfigFieldCount {
var missing []string
if cfg.StealthDomain == "" {
missing = append(missing, "stealth_domain")
}
if cfg.TLSStealthCertPath == "" {
missing = append(missing, "tls_stealth_cert_path")
}
if cfg.TLSStealthKeyPath == "" {
missing = append(missing, "tls_stealth_key_path")
}
return fmt.Errorf("turn: partial stealth config — set all of [stealth_domain, tls_stealth_cert_path, tls_stealth_key_path] or none; missing: %s", strings.Join(missing, ", "))
}
reloader, err := newCertReloader(cfg.TLSStealthCertPath, cfg.TLSStealthKeyPath, s.logger)
if err != nil {
return fmt.Errorf("failed to load stealth TLS cert/key (cert=%s, key=%s): %w", cfg.TLSStealthCertPath, cfg.TLSStealthKeyPath, err)
}
s.stealthCertReloader = reloader
return nil
}
// newGetCertificate builds the tls.Config.GetCertificate callback. When the
// ClientHello ServerName equals stealthDomain (case-insensitively), it serves
// the stealth cert; every other case — including empty SNI and the primary TURN
// domain — serves the primary cert, preserving the pre-stealth behavior. When
// stealth is disabled (stealthReloader nil) it is exactly primary.GetCertificate.
func newGetCertificate(stealthDomain string, primary, stealth *certReloader) func(*tls.ClientHelloInfo) (*tls.Certificate, error) {
return func(hello *tls.ClientHelloInfo) (*tls.Certificate, error) {
if stealth != nil && hello != nil && strings.EqualFold(hello.ServerName, stealthDomain) {
return stealth.GetCertificate(hello)
}
return primary.GetCertificate(hello)
}
}
// authHandler validates HMAC-SHA1 credentials.
// Username format: {expiry_unix}:{namespace}
// Password: base64(HMAC-SHA1(shared_secret, username))
@ -239,6 +313,8 @@ func (s *Server) closeListeners() {
s.tlsListener.Close()
s.tlsListener = nil
}
s.certReloader = nil
s.stealthCertReloader = nil
}
// GenerateCredentials creates time-limited HMAC-SHA1 TURN credentials.

26
core/pkg/turn/stealth.go Normal file
View File

@ -0,0 +1,26 @@
package turn
import (
"crypto/sha256"
"encoding/hex"
"fmt"
)
// stealthHostHashBytes is how many bytes of the namespace digest appear in the
// stealth hostname label. 6 bytes (12 hex chars) keeps the label CDN-bland
// while making cross-namespace collisions negligible at platform scale.
const stealthHostHashBytes = 6
// StealthHostForNamespace derives the censorship-resistant TURNS hostname for
// a namespace: "cdn-<12-hex-of-sha256(namespace)>.<baseDomain>".
//
// Design (feat-124): the label must NOT contain the namespace (an SNI string
// like "cdn.ns-anchat-test.…" hands DPI the exact app to block), must be
// deterministic so every component (cluster manager, namespace gateway, SNI
// router, DNS) derives the same value with no extra coordination, and must be
// unique per namespace because the SNI router maps it to that namespace's
// TURN-TLS backend.
func StealthHostForNamespace(namespace, baseDomain string) string {
sum := sha256.Sum256([]byte(namespace))
return fmt.Sprintf("cdn-%s.%s", hex.EncodeToString(sum[:stealthHostHashBytes]), baseDomain)
}

View File

@ -0,0 +1,201 @@
package turn
import (
"bytes"
"crypto/tls"
"path/filepath"
"strings"
"testing"
"go.uber.org/zap"
)
// feat-124: the stealth TURNS endpoint is an SNI-router passthrough — the TURN
// server terminates TLS for both the primary TURN domain and a neutral stealth
// domain, selecting the cert by ClientHello SNI. These pin: per-SNI selection
// (incl. empty SNI, case-insensitivity), partial-config startup failure, and
// the missing stealth-cert startup failure (no silent fallback).
const (
stealthTestDomain = "cdn-a1b2c3d4e5f6.orama-devnet.network"
turnTestDomain = "turn.orama-devnet.network"
)
func writeNamedCert(t *testing.T, dir, name string) (certPath, keyPath string) {
t.Helper()
certPath = filepath.Join(dir, name+".pem")
keyPath = filepath.Join(dir, name+".key.pem")
if err := GenerateSelfSignedCert(certPath, keyPath, "127.0.0.1"); err != nil {
t.Fatalf("GenerateSelfSignedCert(%s): %v", name, err)
}
return certPath, keyPath
}
func certLeafForSNI(t *testing.T, getCert func(*tls.ClientHelloInfo) (*tls.Certificate, error), serverName string) []byte {
t.Helper()
cert, err := getCert(&tls.ClientHelloInfo{ServerName: serverName})
if err != nil {
t.Fatalf("GetCertificate(%q): %v", serverName, err)
}
if cert == nil || len(cert.Certificate) == 0 {
t.Fatalf("GetCertificate(%q) returned an empty certificate", serverName)
}
return cert.Certificate[0]
}
func TestGetCertificate_stealthSNISelectsStealthCert(t *testing.T) {
dir := t.TempDir()
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
stealthCert, stealthKey := writeNamedCert(t, dir, "stealth")
primary, err := newCertReloader(primaryCert, primaryKey, zap.NewNop())
if err != nil {
t.Fatalf("newCertReloader(primary): %v", err)
}
stealth, err := newCertReloader(stealthCert, stealthKey, zap.NewNop())
if err != nil {
t.Fatalf("newCertReloader(stealth): %v", err)
}
getCert := newGetCertificate(stealthTestDomain, primary, stealth)
wantPrimary := leafDER(t, primary)
wantStealth := leafDER(t, stealth)
if bytes.Equal(wantPrimary, wantStealth) {
t.Fatal("test setup error: primary and stealth certs must be distinct")
}
tests := []struct {
name string
serverName string
want []byte
}{
{"stealth SNI selects stealth cert", stealthTestDomain, wantStealth},
{"stealth SNI is case-insensitive", strings.ToUpper(stealthTestDomain), wantStealth},
{"turn domain SNI selects primary cert", turnTestDomain, wantPrimary},
{"empty SNI selects primary cert", "", wantPrimary},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := certLeafForSNI(t, getCert, tt.serverName)
if !bytes.Equal(got, tt.want) {
t.Errorf("ServerName=%q served the wrong certificate", tt.serverName)
}
})
}
}
func TestGetCertificate_stealthDisabledAlwaysPrimary(t *testing.T) {
dir := t.TempDir()
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
primary, err := newCertReloader(primaryCert, primaryKey, zap.NewNop())
if err != nil {
t.Fatalf("newCertReloader(primary): %v", err)
}
// Stealth disabled (nil reloader): every SNI — including a string that looks
// like a stealth host — must serve the primary cert unchanged.
getCert := newGetCertificate("", primary, nil)
want := leafDER(t, primary)
for _, serverName := range []string{"", turnTestDomain, stealthTestDomain} {
if got := certLeafForSNI(t, getCert, serverName); !bytes.Equal(got, want) {
t.Errorf("ServerName=%q must serve the primary cert when stealth is disabled", serverName)
}
}
}
func baseStealthConfig(t *testing.T) *Config {
t.Helper()
dir := t.TempDir()
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
return &Config{
ListenAddr: "127.0.0.1:0",
TURNSListenAddr: "127.0.0.1:0",
TLSCertPath: primaryCert,
TLSKeyPath: primaryKey,
PublicIP: "127.0.0.1",
Realm: "orama-devnet.network",
AuthSecret: "test-secret-key",
RelayPortStart: 49152,
RelayPortEnd: 50000,
Namespace: "test-ns",
}
}
func TestServer_partialStealthConfigFails(t *testing.T) {
tests := []struct {
name string
mutate func(c *Config)
wantMissing []string
}{
{
name: "only stealth_domain set",
mutate: func(c *Config) { c.StealthDomain = stealthTestDomain },
wantMissing: []string{"tls_stealth_cert_path", "tls_stealth_key_path"},
},
{
name: "domain and cert set, key missing",
mutate: func(c *Config) { c.StealthDomain = stealthTestDomain; c.TLSStealthCertPath = "/tmp/x.pem" },
wantMissing: []string{"tls_stealth_key_path"},
},
{
name: "only cert path set",
mutate: func(c *Config) { c.TLSStealthCertPath = "/tmp/x.pem" },
wantMissing: []string{"stealth_domain", "tls_stealth_key_path"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := baseStealthConfig(t)
tt.mutate(cfg)
srv, err := NewServer(cfg, zap.NewNop())
if err == nil {
srv.Close()
t.Fatal("expected startup to fail on partial stealth config")
}
for _, field := range tt.wantMissing {
if !strings.Contains(err.Error(), field) {
t.Errorf("error must name the missing field %q; got: %v", field, err)
}
}
})
}
}
func TestServer_missingStealthCertFails(t *testing.T) {
cfg := baseStealthConfig(t)
cfg.StealthDomain = stealthTestDomain
cfg.TLSStealthCertPath = filepath.Join(t.TempDir(), "absent-cert.pem")
cfg.TLSStealthKeyPath = filepath.Join(t.TempDir(), "absent-key.pem")
srv, err := NewServer(cfg, zap.NewNop())
if err == nil {
srv.Close()
t.Fatal("expected startup to fail when the stealth cert file is absent")
}
if !strings.Contains(err.Error(), cfg.TLSStealthCertPath) {
t.Errorf("error must name the missing stealth cert path %q; got: %v", cfg.TLSStealthCertPath, err)
}
}
func TestServer_fullStealthConfigStarts(t *testing.T) {
cfg := baseStealthConfig(t)
dir := t.TempDir()
stealthCert, stealthKey := writeNamedCert(t, dir, "stealth")
cfg.StealthDomain = stealthTestDomain
cfg.TLSStealthCertPath = stealthCert
cfg.TLSStealthKeyPath = stealthKey
srv, err := NewServer(cfg, zap.NewNop())
if err != nil {
t.Fatalf("expected startup to succeed with full stealth config: %v", err)
}
defer srv.Close()
if srv.stealthCertReloader == nil {
t.Error("stealthCertReloader must be set when stealth is fully configured")
}
}

View File

@ -0,0 +1,53 @@
package turn
import (
"regexp"
"strings"
"testing"
)
func TestStealthHostForNamespace_deterministic(t *testing.T) {
a := StealthHostForNamespace("anchat-test", "orama-devnet.network")
b := StealthHostForNamespace("anchat-test", "orama-devnet.network")
if a != b {
t.Fatalf("not deterministic: %q vs %q", a, b)
}
if !strings.HasPrefix(a, "cdn-") || !strings.HasSuffix(a, ".orama-devnet.network") {
t.Errorf("unexpected shape: %q", a)
}
// label = "cdn-" + 12 hex chars
label := strings.SplitN(a, ".", 2)[0]
if len(label) != len("cdn-")+stealthHostHashBytes*2 {
t.Errorf("label %q has wrong length", label)
}
}
func TestStealthHostForNamespace_namespaceNotLeaked(t *testing.T) {
h := StealthHostForNamespace("anchat-test", "orama-devnet.network")
if strings.Contains(h, "anchat") {
t.Errorf("stealth host %q leaks the namespace name", h)
}
}
func TestStealthHostForNamespace_distinctPerNamespace(t *testing.T) {
a := StealthHostForNamespace("ns-a", "example.com")
b := StealthHostForNamespace("ns-b", "example.com")
if a == b {
t.Fatalf("different namespaces produced the same stealth host %q", a)
}
}
// TestStealthHostForNamespace_matchesDNSNameAllowlist guards the contract that
// the derived host always passes the Caddyfile DNS-name allowlist
// (pkg/namespace turn_cert.go dnsNamePattern) — a legitimate stealth domain
// must never be rejected by that defense-in-depth check. Mirrors the same
// conservative pattern here to avoid an import cycle.
func TestStealthHostForNamespace_matchesDNSNameAllowlist(t *testing.T) {
dnsName := regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$`)
for _, ns := range []string{"anchat-test", "a", "ns-with-many-dashes", "x1y2z3"} {
h := StealthHostForNamespace(ns, "orama-devnet.network")
if !dnsName.MatchString(h) {
t.Errorf("derived stealth host %q for ns %q fails the DNS-name allowlist", h, ns)
}
}
}