mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-06-16 22:54:12 +00:00
feat(gateway): implement stealth TURN discovery and configuration
- Add `turn_stealth_domain` to gateway config for stealth TURN support - Introduce `turn_discovery` in `sni-router` to auto-discover per-namespace routes - Add database migration to enable stealth TURN per namespace - Document ephemeral state API in `SERVERLESS.md`
This commit is contained in:
parent
f192cd0b84
commit
b9d5f542e1
@ -74,6 +74,10 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
|
||||
SFUPort int `yaml:"sfu_port"`
|
||||
TURNDomain string `yaml:"turn_domain"`
|
||||
TURNSecret string `yaml:"turn_secret"`
|
||||
// TURNStealthDomain is the neutral stealth TURNS:443 host (feat-124).
|
||||
// Maps to cfg.StealthCDNDomain so turn.credentials advertises the
|
||||
// stealth rung of the URI ladder.
|
||||
TURNStealthDomain string `yaml:"turn_stealth_domain"`
|
||||
}
|
||||
|
||||
type yamlCfg struct {
|
||||
@ -256,6 +260,9 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
|
||||
if v := strings.TrimSpace(y.WebRTC.TURNSecret); v != "" {
|
||||
cfg.TURNSecret = v
|
||||
}
|
||||
if v := strings.TrimSpace(y.WebRTC.TURNStealthDomain); v != "" {
|
||||
cfg.StealthCDNDomain = v
|
||||
}
|
||||
|
||||
// Validate configuration
|
||||
if errs := cfg.ValidateConfig(); len(errs) > 0 {
|
||||
|
||||
@ -32,6 +32,18 @@
|
||||
// backend:
|
||||
// name: gateway
|
||||
// addr: "127.0.0.1:8443"
|
||||
// turn_discovery:
|
||||
// namespaces_dir: /opt/orama/.orama/data/namespaces
|
||||
// base_domain: orama-devnet.network
|
||||
// rescan_interval: 30s
|
||||
//
|
||||
// When the turn_discovery.namespaces_dir is set, the router additionally scans
|
||||
// <namespaces_dir>/*/configs/turn-*.yaml every rescan_interval and derives two
|
||||
// routes per namespace with a TURNS listener — the bland stealth host and a
|
||||
// "turn.ns-<namespace>.<base_domain>" alias — both forwarding to that
|
||||
// namespace's local TURNS port. Discovered routes are merged with the static
|
||||
// routes above (static wins on conflict); a transient scan error keeps the
|
||||
// previously-installed routes.
|
||||
package main
|
||||
|
||||
import (
|
||||
@ -69,6 +81,15 @@ type yamlRoute struct {
|
||||
Backend yamlBackend `yaml:"backend"`
|
||||
}
|
||||
|
||||
// yamlTURNDiscovery mirrors sniproxy.TURNDiscoveryConfig for YAML decoding.
|
||||
// When present and namespaces_dir is set, the router auto-discovers per-
|
||||
// namespace stealth-TURN routes by scanning <namespaces_dir>/*/configs/turn-*.yaml.
|
||||
type yamlTURNDiscovery struct {
|
||||
NamespacesDir string `yaml:"namespaces_dir"`
|
||||
BaseDomain string `yaml:"base_domain"`
|
||||
RescanInterval time.Duration `yaml:"rescan_interval"`
|
||||
}
|
||||
|
||||
// yamlConfig is the on-disk configuration shape.
|
||||
type yamlConfig struct {
|
||||
Listen string `yaml:"listen"`
|
||||
@ -77,6 +98,12 @@ type yamlConfig struct {
|
||||
MaxConcurrentConns int `yaml:"max_concurrent_conns"`
|
||||
Fallback yamlBackend `yaml:"fallback"`
|
||||
Routes []yamlRoute `yaml:"routes"`
|
||||
TURNDiscovery yamlTURNDiscovery `yaml:"turn_discovery"`
|
||||
}
|
||||
|
||||
// discoveryEnabled reports whether TURN route auto-discovery is configured.
|
||||
func (y *yamlConfig) discoveryEnabled() bool {
|
||||
return y.TURNDiscovery.NamespacesDir != ""
|
||||
}
|
||||
|
||||
func main() {
|
||||
@ -94,25 +121,49 @@ func main() {
|
||||
|
||||
router := sniproxy.NewRouter(toBackend(cfg.Fallback))
|
||||
|
||||
// Hot-reload the route table from the config file so a namespace's
|
||||
// cdn/turn SNI routes can be added or removed without restarting the
|
||||
// router (Router.Replace swaps atomically under in-flight connections).
|
||||
reloader := sniproxy.NewFileRouteReloader(configPath,
|
||||
func() ([]sniproxy.Route, sniproxy.Backend, error) {
|
||||
// The static routes (and fallback) always come from the config file; this
|
||||
// closure is re-evaluated on every reload/rescan so a hand-edit to the
|
||||
// config is picked up without a restart.
|
||||
staticSource := func() ([]sniproxy.Route, sniproxy.Backend, error) {
|
||||
y, err := loadConfig(configPath)
|
||||
if err != nil {
|
||||
return nil, sniproxy.Backend{}, err
|
||||
}
|
||||
return toRoutes(y.Routes), toBackend(y.Fallback), nil
|
||||
}, router, logger.Logger)
|
||||
}
|
||||
|
||||
routeStop := make(chan struct{})
|
||||
defer close(routeStop)
|
||||
|
||||
if cfg.discoveryEnabled() {
|
||||
// Auto-discover per-namespace stealth-TURN routes by scanning the
|
||||
// namespaces directory, merged with the static config routes (static
|
||||
// wins on conflict), re-installed atomically every rescan_interval. A
|
||||
// transient scan error keeps the previously-installed routes.
|
||||
discoverer := sniproxy.NewTURNRouteDiscoverer(
|
||||
sniproxy.TURNDiscoveryConfig{
|
||||
NamespacesDir: cfg.TURNDiscovery.NamespacesDir,
|
||||
BaseDomain: cfg.TURNDiscovery.BaseDomain,
|
||||
RescanInterval: cfg.TURNDiscovery.RescanInterval,
|
||||
}, staticSource, router, logger.Logger)
|
||||
if err := discoverer.Apply(); err != nil {
|
||||
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
|
||||
zap.Error(err))
|
||||
os.Exit(1)
|
||||
}
|
||||
go discoverer.Run(routeStop)
|
||||
} else {
|
||||
// No discovery configured: hot-reload the static route table from the
|
||||
// config file so cdn/turn SNI routes can be added or removed without
|
||||
// restarting (Router.Replace swaps atomically under in-flight conns).
|
||||
reloader := sniproxy.NewFileRouteReloader(configPath, staticSource, router, logger.Logger)
|
||||
if err := reloader.Apply(); err != nil {
|
||||
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
|
||||
zap.Error(err))
|
||||
os.Exit(1)
|
||||
}
|
||||
routeStop := make(chan struct{})
|
||||
defer close(routeStop)
|
||||
go reloader.Watch(sniproxy.DefaultRouteReloadInterval, routeStop)
|
||||
}
|
||||
|
||||
srv := sniproxy.NewServer(router, sniproxy.Config{
|
||||
ClientHelloTimeout: cfg.ClientHelloTimeout,
|
||||
@ -235,6 +286,16 @@ func validateConfig(y *yamlConfig) []string {
|
||||
errs = append(errs, fmt.Sprintf("routes[%d].backend.addr: required", i))
|
||||
}
|
||||
}
|
||||
// turn_discovery is optional, but when partially set (namespaces_dir XOR
|
||||
// base_domain) it is almost certainly a misconfiguration, so validate the
|
||||
// pair together via the library's own Validate.
|
||||
if y.discoveryEnabled() || y.TURNDiscovery.BaseDomain != "" {
|
||||
dc := sniproxy.TURNDiscoveryConfig{
|
||||
NamespacesDir: y.TURNDiscovery.NamespacesDir,
|
||||
BaseDomain: y.TURNDiscovery.BaseDomain,
|
||||
}
|
||||
errs = append(errs, dc.Validate()...)
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
|
||||
@ -187,6 +187,69 @@ The legacy `db_execute` is kept indefinitely so existing functions don't break.
|
||||
|----------|-------------|
|
||||
| `pubsub_publish(topic, dataJSON)` → bool | Publish message to a PubSub topic. Returns true on success. |
|
||||
|
||||
### Ephemeral State (WS-subscribe-tracked)
|
||||
|
||||
Short-lived per-subscriber state (typing indicators, presence, call ringing,
|
||||
live cursors) that the gateway **auto-clears the moment the owning WebSocket
|
||||
client disconnects** — no heartbeats, no prune crons. State also expires on a
|
||||
TTL backstop (default 60 s, max 30 min). The owning client ID and namespace
|
||||
come from the server-trusted invocation context; functions cannot spoof them.
|
||||
|
||||
| Function | Description |
|
||||
|----------|-------------|
|
||||
| `ephemeral_state_set(topic, key, payload, ttlMs)` → u32 | Record state owned by the CURRENT invocation's WS client and publish an `ephemeral.set` event on the topic. 1 = ok, 0 = failure (no WS client, empty topic/key, payload > 16 KiB, > 256 keys/client). |
|
||||
| `ephemeral_state_clear(topic, key)` → u32 | Clear state this client owns; publishes `ephemeral.clear` (reason `explicit`). Idempotent — clearing a missing/non-owned key returns 1. |
|
||||
| `ephemeral_state_list(topic)` → u64 | Reconnect catch-up read: packed `ptr<<32\|len` of a JSON envelope with the live entries on the topic. Works without a WS client (read-only). 0 on failure. |
|
||||
|
||||
Raw import signatures (pointer/length ABI — note `ttlMs` is **i64**):
|
||||
|
||||
```go
|
||||
//go:wasmimport env ephemeral_state_set
|
||||
func ephemeralStateSet(topicPtr *byte, topicLen uint32, keyPtr *byte, keyLen uint32,
|
||||
payloadPtr *byte, payloadLen uint32, ttlMs int64) uint32
|
||||
|
||||
//go:wasmimport env ephemeral_state_clear
|
||||
func ephemeralStateClear(topicPtr *byte, topicLen uint32, keyPtr *byte, keyLen uint32) uint32
|
||||
|
||||
//go:wasmimport env ephemeral_state_list
|
||||
func ephemeralStateList(topicPtr *byte, topicLen uint32) uint64 // ptr<<32|len of JSON
|
||||
```
|
||||
|
||||
Synthetic events are published **on the same topic** the state lives on, with
|
||||
the `_orama` control-frame discriminator (same dispatch pattern as the
|
||||
`auth.refresh` frame). Subscribers update their local view from the stream:
|
||||
|
||||
```json
|
||||
{"_orama":"ephemeral.set", "topic":"typing:room1", "key":"user-7", "client_id":"ws-abc", "payload":"<base64>"}
|
||||
{"_orama":"ephemeral.clear","topic":"typing:room1", "key":"user-7", "client_id":"ws-abc", "reason":"disconnect"}
|
||||
```
|
||||
|
||||
`reason` is `explicit` (function called clear), `disconnect` (owning WS client
|
||||
went away — the zero-lag path), or `expired` (TTL backstop). `payload` is
|
||||
base64 (Go `[]byte` JSON encoding) and present only on `ephemeral.set`.
|
||||
|
||||
`ephemeral_state_list` returns:
|
||||
|
||||
```json
|
||||
{"entries":[{"key":"user-7","client_id":"ws-abc","payload":"<base64>","expires_in_ms":48211}]}
|
||||
```
|
||||
|
||||
Typing-indicator shape (called from a `ws_persistent` rpc-router function):
|
||||
|
||||
```go
|
||||
// Client sends {"op":"typing.start","room":"room1","user":"user-7"} → handler:
|
||||
ephemeralStateSet(ptr("typing:"+room), len32("typing:"+room),
|
||||
ptr(userID), len32(userID), nil, 0, 30_000) // 30s TTL backstop
|
||||
|
||||
// Client sends typing.stop → handler:
|
||||
ephemeralStateClear(ptr("typing:"+room), len32("typing:"+room), ptr(userID), len32(userID))
|
||||
|
||||
// No typing.stop needed on app kill / network drop: the WS disconnect publishes
|
||||
// {"_orama":"ephemeral.clear",...,"reason":"disconnect"} to every subscriber
|
||||
// immediately. On (re)connect, call ephemeral_state_list("typing:"+room) once
|
||||
// to seed local state, then track the event stream.
|
||||
```
|
||||
|
||||
### Logging
|
||||
|
||||
| Function | Description |
|
||||
|
||||
16
core/migrations/030_webrtc_stealth.sql
Normal file
16
core/migrations/030_webrtc_stealth.sql
Normal file
@ -0,0 +1,16 @@
|
||||
-- =============================================================================
|
||||
-- 030_webrtc_stealth.sql
|
||||
--
|
||||
-- Stealth TURNS-over-443 per namespace — feat-124 (censorship-resistant
|
||||
-- calling). When stealth_enabled is true the namespace's TURN servers carry a
|
||||
-- second TLS certificate for the neutral stealth hostname
|
||||
-- (cdn-<hash>.<base-domain>, derived via turn.StealthHostForNamespace), the
|
||||
-- SNI router forwards :443 ClientHellos for that hostname to the TURN TLS
|
||||
-- listener, and turn.credentials advertises `turns:<stealth-host>:443` as the
|
||||
-- final rung of the ICE URI ladder.
|
||||
--
|
||||
-- Default false → backward compatible: existing WebRTC namespaces keep the
|
||||
-- baseline udp:3478 / tcp:3478 / turns:5349 URIs unchanged.
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE namespace_webrtc_config ADD COLUMN stealth_enabled BOOLEAN DEFAULT FALSE;
|
||||
@ -79,6 +79,8 @@ func showNamespaceHelp() {
|
||||
fmt.Printf(" repair <namespace> - Repair an under-provisioned namespace cluster\n")
|
||||
fmt.Printf(" enable webrtc --namespace NS - Enable WebRTC (SFU + TURN) for a namespace\n")
|
||||
fmt.Printf(" disable webrtc --namespace NS - Disable WebRTC for a namespace\n")
|
||||
fmt.Printf(" enable webrtc-stealth --namespace NS - Enable stealth TURNS over :443 (feat-124)\n")
|
||||
fmt.Printf(" disable webrtc-stealth --namespace NS - Disable stealth TURNS\n")
|
||||
fmt.Printf(" webrtc-status --namespace NS - Show WebRTC service status\n")
|
||||
fmt.Printf(" help - Show this help message\n\n")
|
||||
fmt.Printf("Flags:\n")
|
||||
@ -226,8 +228,12 @@ func handleNamespaceDelete(force bool) {
|
||||
|
||||
func handleNamespaceEnable(args []string) {
|
||||
feature := args[0]
|
||||
if feature == "webrtc-stealth" {
|
||||
handleNamespaceStealthToggle(args[1:], true)
|
||||
return
|
||||
}
|
||||
if feature != "webrtc" {
|
||||
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc\n", feature)
|
||||
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc, webrtc-stealth\n", feature)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@ -283,10 +289,82 @@ func handleNamespaceEnable(args []string) {
|
||||
fmt.Printf(" TURN instances: 2 nodes (relay on public IPs)\n")
|
||||
}
|
||||
|
||||
// handleNamespaceStealthToggle drives /v1/namespace/webrtc/stealth/{enable|disable}
|
||||
// (feat-124 — censorship-resistant TURNS over :443).
|
||||
func handleNamespaceStealthToggle(args []string, enable bool) {
|
||||
verb := "disable"
|
||||
if enable {
|
||||
verb = "enable"
|
||||
}
|
||||
|
||||
var ns string
|
||||
fs := flag.NewFlagSet("namespace "+verb+" webrtc-stealth", flag.ExitOnError)
|
||||
fs.StringVar(&ns, "namespace", "", "Namespace name")
|
||||
_ = fs.Parse(args)
|
||||
|
||||
if ns == "" {
|
||||
fmt.Fprintf(os.Stderr, "Usage: orama namespace %s webrtc-stealth --namespace <name>\n", verb)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
gatewayURL, apiKey := loadAuthForNamespace(ns)
|
||||
|
||||
if enable {
|
||||
fmt.Printf("Enabling WebRTC stealth (TURNS over :443) for namespace '%s'...\n", ns)
|
||||
fmt.Printf("This provisions a Let's Encrypt cert for the neutral stealth host and may take up to ~2 minutes.\n")
|
||||
} else {
|
||||
fmt.Printf("Disabling WebRTC stealth for namespace '%s'...\n", ns)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/v1/namespace/webrtc/stealth/%s", gatewayURL, verb)
|
||||
req, err := http.NewRequest(http.MethodPost, url, nil)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Failed to create request: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+apiKey)
|
||||
|
||||
client := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
},
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Failed to connect to gateway: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
var result map[string]interface{}
|
||||
json.NewDecoder(resp.Body).Decode(&result)
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
errMsg := "unknown error"
|
||||
if e, ok := result["error"].(string); ok {
|
||||
errMsg = e
|
||||
}
|
||||
fmt.Fprintf(os.Stderr, "Failed to %s WebRTC stealth: %s\n", verb, errMsg)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if enable {
|
||||
fmt.Printf("WebRTC stealth enabled for namespace '%s'.\n", ns)
|
||||
fmt.Printf(" turn.credentials now advertises the full URI ladder including turns:<stealth-host>:443.\n")
|
||||
fmt.Printf(" Make sure the SNI router is enabled on the TURN nodes (node.yaml sni_router.enabled).\n")
|
||||
} else {
|
||||
fmt.Printf("WebRTC stealth disabled for namespace '%s'.\n", ns)
|
||||
}
|
||||
}
|
||||
|
||||
func handleNamespaceDisable(args []string) {
|
||||
feature := args[0]
|
||||
if feature == "webrtc-stealth" {
|
||||
handleNamespaceStealthToggle(args[1:], false)
|
||||
return
|
||||
}
|
||||
if feature != "webrtc" {
|
||||
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc\n", feature)
|
||||
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc, webrtc-stealth\n", feature)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
|
||||
@ -230,9 +230,54 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri
|
||||
return "", fmt.Errorf("failed to populate webrtc config: %w", err)
|
||||
}
|
||||
|
||||
// Stealth TURN SNI router (feat-124). Like the webrtc block, sni_router is
|
||||
// an operator opt-in that only exists in the previous node.yaml, so carry
|
||||
// it forward across regeneration. Without this, a Phase4 regen would reset
|
||||
// sni_router.enabled to false, stop the :443 router and break stealth TURN
|
||||
// for every region that relies on it (the same regen-wipe class of outage
|
||||
// as bugboard #259/#846).
|
||||
cg.populateSNIRouterConfig(&data)
|
||||
|
||||
return templates.RenderNodeConfig(data)
|
||||
}
|
||||
|
||||
// populateSNIRouterConfig carries forward the operator-set sni_router.enabled
|
||||
// flag from the existing node.yaml so a config regeneration never silently
|
||||
// disables the stealth TURN-over-443 router. Absence of the file or block
|
||||
// leaves the flag at its default (false).
|
||||
func (cg *ConfigGenerator) populateSNIRouterConfig(data *templates.NodeConfigData) {
|
||||
data.SNIRouterEnabled = cg.readExistingSNIRouterEnabled()
|
||||
}
|
||||
|
||||
// SNIRouterEnabled reports whether the node's on-disk node.yaml has opted in to
|
||||
// the stealth TURN-over-443 SNI router. The orchestrator reads this AFTER
|
||||
// Phase4 has written node.yaml to decide whether to move Caddy to :8443 and
|
||||
// start the router unit. Returns false when the config or block is absent.
|
||||
func (cg *ConfigGenerator) SNIRouterEnabled() bool {
|
||||
return cg.readExistingSNIRouterEnabled()
|
||||
}
|
||||
|
||||
// readExistingSNIRouterEnabled parses just the top-level sni_router.enabled
|
||||
// flag out of the existing node.yaml. Returns false when the file is missing,
|
||||
// malformed, or has no sni_router block (fresh install / not opted in).
|
||||
func (cg *ConfigGenerator) readExistingSNIRouterEnabled() bool {
|
||||
configPath := filepath.Join(cg.oramaDir, "configs", "node.yaml")
|
||||
raw, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
return false // No existing config (fresh install) — default off.
|
||||
}
|
||||
|
||||
var parsed struct {
|
||||
SNIRouter struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
} `yaml:"sni_router"`
|
||||
}
|
||||
if err := yaml.Unmarshal(raw, &parsed); err != nil {
|
||||
return false // Malformed/old config — don't fail regen; default off.
|
||||
}
|
||||
return parsed.SNIRouter.Enabled
|
||||
}
|
||||
|
||||
// existingWebRTC is the minimal shape parsed out of an existing node.yaml to
|
||||
// carry forward operator-set WebRTC fields across a config regeneration.
|
||||
type existingWebRTC struct {
|
||||
|
||||
@ -24,6 +24,7 @@ type BinaryInstaller struct {
|
||||
coredns *installers.CoreDNSInstaller
|
||||
caddy *installers.CaddyInstaller
|
||||
ntfy *installers.NtfyInstaller // feature #72; installed only when EnableNtfy is set
|
||||
sniRouter *installers.SNIRouterInstaller // feat-124; configured only when sni_router.enabled
|
||||
}
|
||||
|
||||
// NewBinaryInstaller creates a new binary installer
|
||||
@ -41,6 +42,7 @@ func NewBinaryInstaller(arch string, logWriter io.Writer) *BinaryInstaller {
|
||||
coredns: installers.NewCoreDNSInstaller(arch, logWriter, oramaHome),
|
||||
caddy: installers.NewCaddyInstaller(arch, logWriter, oramaHome),
|
||||
ntfy: installers.NewNtfyInstaller(arch, logWriter),
|
||||
sniRouter: installers.NewSNIRouterInstaller(arch, logWriter, OramaDir),
|
||||
}
|
||||
}
|
||||
|
||||
@ -158,6 +160,29 @@ func (bi *BinaryInstaller) EnableCaddyNtfyProxy(hostname string) {
|
||||
bi.caddy.EnableNtfyProxy(hostname)
|
||||
}
|
||||
|
||||
// EnableCaddySNIRouterMode moves Caddy's HTTPS listener off :443 to :8443 on
|
||||
// the next ConfigureCaddy() call, freeing :443 for the orama-sni-router
|
||||
// (feat-124). Must be called BEFORE ConfigureCaddy.
|
||||
func (bi *BinaryInstaller) EnableCaddySNIRouterMode() {
|
||||
bi.caddy.EnableSNIRouterMode()
|
||||
}
|
||||
|
||||
// ConfigureSNIRouter writes the orama-sni-router YAML config (listen :443,
|
||||
// fallback Caddy on :8443, turn_discovery for baseDomain). Feat-124.
|
||||
func (bi *BinaryInstaller) ConfigureSNIRouter(baseDomain string) error {
|
||||
return bi.sniRouter.Configure(baseDomain)
|
||||
}
|
||||
|
||||
// WriteSNIRouterUnit writes /etc/systemd/system/orama-sni-router.service.
|
||||
func (bi *BinaryInstaller) WriteSNIRouterUnit() error {
|
||||
return bi.sniRouter.WriteSystemdUnit()
|
||||
}
|
||||
|
||||
// SNIRouterServiceName returns the systemd unit name for lifecycle calls.
|
||||
func (bi *BinaryInstaller) SNIRouterServiceName() string {
|
||||
return installers.SNIRouterServiceName
|
||||
}
|
||||
|
||||
// InstallNtfy installs the self-hosted ntfy server (binary, user,
|
||||
// systemd unit, data directory). Feature #72. Idempotent.
|
||||
func (bi *BinaryInstaller) InstallNtfy() error {
|
||||
|
||||
@ -27,8 +27,20 @@ type CaddyInstaller struct {
|
||||
// Enabled per-node via EnableNtfyProxy. Feature #72.
|
||||
withNtfy bool
|
||||
ntfyHostname string // e.g. "push.dbrs.space" — fully-qualified public host
|
||||
|
||||
// behindSNIRouter, when set, moves Caddy's HTTPS listener off :443 to
|
||||
// CaddyHTTPSPortBehindSNI so the orama-sni-router can own :443 and forward
|
||||
// TLS by SNI (feat-124, stealth TURN). Enabled per-node via
|
||||
// EnableSNIRouterMode. Plain HTTP (:80) is unaffected. When false the
|
||||
// generated Caddyfile is byte-identical to the pre-feature output.
|
||||
behindSNIRouter bool
|
||||
}
|
||||
|
||||
// CaddyHTTPSPortBehindSNI is the port Caddy binds for HTTPS when the node runs
|
||||
// behind the SNI router (which owns :443). 8443 matches the sni-router config's
|
||||
// caddy fallback backend (127.0.0.1:8443) and the plan doc.
|
||||
const CaddyHTTPSPortBehindSNI = 8443
|
||||
|
||||
// NewCaddyInstaller creates a new Caddy installer
|
||||
func NewCaddyInstaller(arch string, logWriter io.Writer, oramaHome string) *CaddyInstaller {
|
||||
return &CaddyInstaller{
|
||||
@ -52,6 +64,16 @@ func (ci *CaddyInstaller) EnableNtfyProxy(hostname string) {
|
||||
ci.ntfyHostname = hostname
|
||||
}
|
||||
|
||||
// EnableSNIRouterMode tells the Caddy installer to bind HTTPS on
|
||||
// CaddyHTTPSPortBehindSNI (8443) instead of :443, freeing :443 for the
|
||||
// orama-sni-router (feat-124). Plain HTTP on :80 is left untouched. Must be
|
||||
// called BEFORE Configure so the generated Caddyfile picks up the global
|
||||
// `https_port` option. A no-op when never called: the default Caddyfile keeps
|
||||
// HTTPS on :443.
|
||||
func (ci *CaddyInstaller) EnableSNIRouterMode() {
|
||||
ci.behindSNIRouter = true
|
||||
}
|
||||
|
||||
// IsInstalled checks if Caddy with orama DNS module is already installed
|
||||
func (ci *CaddyInstaller) IsInstalled() bool {
|
||||
caddyPath := "/usr/bin/caddy"
|
||||
@ -417,7 +439,17 @@ func (ci *CaddyInstaller) generateCaddyfile(domain, email, acmeEndpoint, baseDom
|
||||
// workload is REST + WebSocket (neither benefits much from
|
||||
// h2 stream multiplexing — REST is keep-alive over h1, and
|
||||
// WS is single-connection by design).
|
||||
sb.WriteString(fmt.Sprintf("{\n email %s\n servers {\n protocols h1\n }\n}\n", email))
|
||||
// When this node runs behind the SNI router (feat-124), move Caddy's HTTPS
|
||||
// listener off :443 to CaddyHTTPSPortBehindSNI via the `https_port` global
|
||||
// option. The sni-router owns :443 and forwards TLS by SNI to either a
|
||||
// namespace's TURNS listener or here (127.0.0.1:8443). Plain HTTP (:80) is
|
||||
// unchanged. When behindSNIRouter is false, no `https_port` line is emitted
|
||||
// and the Caddyfile is byte-identical to the pre-feature output.
|
||||
httpsPortOption := ""
|
||||
if ci.behindSNIRouter {
|
||||
httpsPortOption = fmt.Sprintf(" https_port %d\n", CaddyHTTPSPortBehindSNI)
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("{\n email %s\n%s servers {\n protocols h1\n }\n}\n", email, httpsPortOption))
|
||||
|
||||
// Node domain blocks (e.g., node1.dbrs.space, *.node1.dbrs.space)
|
||||
sb.WriteString(fmt.Sprintf("\n*.%s {\n%s\n reverse_proxy localhost:6001\n}\n", domain, tlsBlock))
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package installers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
@ -97,3 +98,50 @@ func TestGenerateCaddyfile_BaseDomainSameAsDomainOmitsDuplicates(t *testing.T) {
|
||||
t.Errorf("expected exactly 2 `*.dbrs.space {` occurrences (1 TLS + 1 HTTP), got %d in:\n%s", got, cf)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGenerateCaddyfile_SNIRouterDisabledByteIdentical is the safety guard for
|
||||
// feat-124: when EnableSNIRouterMode has NOT been called, the generated
|
||||
// Caddyfile must be byte-identical to the pre-feature output (HTTPS stays on
|
||||
// :443, no `https_port` global option). This is the default for every existing
|
||||
// node — any drift here is a silent production change.
|
||||
func TestGenerateCaddyfile_SNIRouterDisabledByteIdentical(t *testing.T) {
|
||||
ci := newTestCaddyInstaller()
|
||||
cf := ci.generateCaddyfile("node1.dbrs.space", "admin@dbrs.space",
|
||||
"http://localhost:6001/v1/internal/acme", "dbrs.space")
|
||||
|
||||
if strings.Contains(cf, "https_port") {
|
||||
t.Errorf("default Caddyfile must NOT contain `https_port` (SNI router off); got:\n%s", cf)
|
||||
}
|
||||
if strings.Contains(cf, "8443") {
|
||||
t.Errorf("default Caddyfile must NOT reference :8443 (SNI router off); got:\n%s", cf)
|
||||
}
|
||||
// The global options block must be exactly the pre-feature shape.
|
||||
if !strings.Contains(cf, "{\n email admin@dbrs.space\n servers {\n protocols h1\n }\n}\n") {
|
||||
t.Errorf("default global options block drifted from pre-feature output; got:\n%s", cf)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGenerateCaddyfile_SNIRouterEnabledMovesHTTPSTo8443 verifies that after
|
||||
// EnableSNIRouterMode, Caddy's HTTPS listener is moved to :8443 via the
|
||||
// `https_port` global option, while plain HTTP (:80) is unchanged so ACME
|
||||
// HTTP-01 and the HTTP catch-all still work.
|
||||
func TestGenerateCaddyfile_SNIRouterEnabledMovesHTTPSTo8443(t *testing.T) {
|
||||
ci := newTestCaddyInstaller()
|
||||
ci.EnableSNIRouterMode()
|
||||
cf := ci.generateCaddyfile("node1.dbrs.space", "admin@dbrs.space",
|
||||
"http://localhost:6001/v1/internal/acme", "dbrs.space")
|
||||
|
||||
want := fmt.Sprintf("https_port %d", CaddyHTTPSPortBehindSNI)
|
||||
if !strings.Contains(cf, want) {
|
||||
t.Errorf("SNI-router Caddyfile must contain %q; got:\n%s", want, cf)
|
||||
}
|
||||
// The global option belongs inside the top-level options block, before the
|
||||
// servers stanza.
|
||||
if !strings.Contains(cf, "{\n email admin@dbrs.space\n https_port 8443\n servers {\n protocols h1\n }\n}\n") {
|
||||
t.Errorf("https_port not placed correctly in global options block; got:\n%s", cf)
|
||||
}
|
||||
// Plain HTTP :80 catch-all must be unchanged.
|
||||
if !strings.Contains(cf, ":80 {") {
|
||||
t.Errorf("HTTP :80 block must remain when SNI router enabled; got:\n%s", cf)
|
||||
}
|
||||
}
|
||||
|
||||
203
core/pkg/environments/production/installers/sni_router.go
Normal file
203
core/pkg/environments/production/installers/sni_router.go
Normal file
@ -0,0 +1,203 @@
|
||||
package installers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// SNI router installer (feat-124, stealth TURN-over-443).
|
||||
//
|
||||
// Unlike the binary installers (Caddy, ntfy), the orama-sni-router binary is
|
||||
// built and shipped to the node by `orama build` / the install tarball — this
|
||||
// installer only writes the router's YAML config and the systemd unit, and
|
||||
// drives the unit's lifecycle (install+enable+start when enabled,
|
||||
// stop+disable when not).
|
||||
|
||||
const (
|
||||
// SNIRouterListenAddr is the public port the router binds. It owns :443 so
|
||||
// Caddy is moved to CaddyHTTPSPortBehindSNI (see caddy.go).
|
||||
SNIRouterListenAddr = ":443"
|
||||
|
||||
// SNIRouterServiceName is the systemd unit name.
|
||||
SNIRouterServiceName = "orama-sni-router.service"
|
||||
|
||||
// SNIRouterConfigName is the router config filename (resolved under
|
||||
// <oramaDir>/configs by the binary's config.DefaultPath lookup).
|
||||
SNIRouterConfigName = "sni-router.yaml"
|
||||
|
||||
// sniRouterRescanInterval is how often the router rescans the namespaces
|
||||
// directory for per-namespace TURNS listeners. Matches the library default
|
||||
// (sniproxy.DefaultDiscoveryRescanInterval); kept as a literal here to avoid
|
||||
// importing the runtime package into the installer.
|
||||
sniRouterRescanInterval = "30s"
|
||||
|
||||
// sniRouterClientHelloTimeout / sniRouterBackendDialTimeout bound the
|
||||
// per-connection ClientHello peek and backend dial (slowloris / dead-backend
|
||||
// protection). Mirror the sniproxy server defaults.
|
||||
sniRouterClientHelloTimeout = "5s"
|
||||
sniRouterBackendDialTimeout = "5s"
|
||||
|
||||
// sniRouterMaxConcurrentConns caps in-flight connections on the public
|
||||
// :443 listener (DoS guard); mirrors the sniproxy server default.
|
||||
sniRouterMaxConcurrentConns = 10000
|
||||
|
||||
// sniRouterSystemdUnitPath is where the unit file is written.
|
||||
sniRouterSystemdUnitPath = "/etc/systemd/system/" + SNIRouterServiceName
|
||||
|
||||
// sniRouterBinaryPath is the installed binary path on the node.
|
||||
sniRouterBinaryPath = "/opt/orama/bin/orama-sni-router"
|
||||
)
|
||||
|
||||
// SNIRouterInstaller writes the orama-sni-router config + systemd unit and
|
||||
// manages the unit lifecycle. The caddy fallback port matches
|
||||
// CaddyHTTPSPortBehindSNI so unmatched SNIs (regular HTTPS) reach the moved
|
||||
// Caddy listener.
|
||||
type SNIRouterInstaller struct {
|
||||
*BaseInstaller
|
||||
oramaDir string // e.g. "/opt/orama/.orama"
|
||||
}
|
||||
|
||||
// NewSNIRouterInstaller creates an installer. oramaDir is the node's .orama
|
||||
// data root (where configs/ and data/namespaces live).
|
||||
func NewSNIRouterInstaller(arch string, logWriter io.Writer, oramaDir string) *SNIRouterInstaller {
|
||||
return &SNIRouterInstaller{
|
||||
BaseInstaller: NewBaseInstaller(arch, logWriter),
|
||||
oramaDir: oramaDir,
|
||||
}
|
||||
}
|
||||
|
||||
// configPath returns the absolute path the router config is written to and the
|
||||
// binary resolves to via its DefaultPath lookup (<oramaDir>/configs/<name>).
|
||||
func (si *SNIRouterInstaller) configPath() string {
|
||||
return filepath.Join(si.oramaDir, "configs", SNIRouterConfigName)
|
||||
}
|
||||
|
||||
// namespacesDir returns the per-namespace config root the router scans for
|
||||
// TURNS listeners.
|
||||
func (si *SNIRouterInstaller) namespacesDir() string {
|
||||
return filepath.Join(si.oramaDir, "data", "namespaces")
|
||||
}
|
||||
|
||||
// Configure writes the router YAML config. baseDomain drives the stealth and
|
||||
// "turn.ns-*" SNI hostnames the router derives during discovery. Idempotent.
|
||||
func (si *SNIRouterInstaller) Configure(baseDomain string) error {
|
||||
if baseDomain == "" {
|
||||
return fmt.Errorf("sni-router: base domain must not be empty")
|
||||
}
|
||||
|
||||
configDir := filepath.Dir(si.configPath())
|
||||
if err := os.MkdirAll(configDir, 0755); err != nil {
|
||||
return fmt.Errorf("sni-router: create config dir %s: %w", configDir, err)
|
||||
}
|
||||
|
||||
content := si.generateConfig(baseDomain)
|
||||
if err := os.WriteFile(si.configPath(), []byte(content), 0644); err != nil {
|
||||
return fmt.Errorf("sni-router: write config %s: %w", si.configPath(), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateConfig renders the sni-router.yaml. The fallback is Caddy on
|
||||
// CaddyHTTPSPortBehindSNI; turn_discovery scans the node's namespaces dir so
|
||||
// per-namespace TURNS routes appear without a router restart. No static routes
|
||||
// are emitted — every TURNS route is auto-discovered.
|
||||
func (si *SNIRouterInstaller) generateConfig(baseDomain string) string {
|
||||
return fmt.Sprintf(`# Orama SNI router config (feat-124, stealth TURN-over-443).
|
||||
# Generated by the installer — re-running install/upgrade overwrites this file.
|
||||
#
|
||||
# The router owns :443, peeks each connection's TLS ClientHello SNI, and
|
||||
# forwards the raw (still-encrypted) stream to a backend. TLS is NOT terminated
|
||||
# here. Unmatched SNIs (regular HTTPS) go to the fallback (Caddy on :%[2]d).
|
||||
listen: "%[1]s"
|
||||
client_hello_timeout: %[3]s
|
||||
backend_dial_timeout: %[4]s
|
||||
max_concurrent_conns: %[5]d
|
||||
|
||||
fallback:
|
||||
name: caddy
|
||||
addr: "127.0.0.1:%[2]d"
|
||||
|
||||
# Per-namespace stealth-TURN routes are auto-discovered by scanning
|
||||
# <namespaces_dir>/*/configs/turn-*.yaml every rescan_interval. Each namespace
|
||||
# with a TURNS listener gets two routes (the bland stealth host and a
|
||||
# turn.ns-<namespace>.<base_domain> alias) forwarding to its local TURNS port.
|
||||
turn_discovery:
|
||||
namespaces_dir: %[6]q
|
||||
base_domain: %[7]q
|
||||
rescan_interval: %[8]s
|
||||
|
||||
# No static routes: every TURNS route comes from turn_discovery above.
|
||||
routes: []
|
||||
`,
|
||||
SNIRouterListenAddr,
|
||||
CaddyHTTPSPortBehindSNI,
|
||||
sniRouterClientHelloTimeout,
|
||||
sniRouterBackendDialTimeout,
|
||||
sniRouterMaxConcurrentConns,
|
||||
si.namespacesDir(),
|
||||
baseDomain,
|
||||
sniRouterRescanInterval,
|
||||
)
|
||||
}
|
||||
|
||||
// generateSystemdUnit renders /etc/systemd/system/orama-sni-router.service.
|
||||
// Runs as the orama user with CAP_NET_BIND_SERVICE so it can bind :443 without
|
||||
// root. Ordered Before=caddy.service so the router is ready before Caddy
|
||||
// switches to :8443. Restart=on-failure.
|
||||
func (si *SNIRouterInstaller) generateSystemdUnit() string {
|
||||
return fmt.Sprintf(`[Unit]
|
||||
Description=Orama SNI Router (TLS-level :443 → backend forwarder)
|
||||
Documentation=https://github.com/DeBrosOfficial/network
|
||||
After=network.target
|
||||
Before=caddy.service
|
||||
PartOf=orama-node.service
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=/opt/orama
|
||||
EnvironmentFile=-/opt/orama/.orama/data/sni-router.env
|
||||
ExecStart=%s --config %s
|
||||
|
||||
# Bind privileged ports (:80, :443) without running as root.
|
||||
AmbientCapabilities=CAP_NET_BIND_SERVICE
|
||||
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||
|
||||
User=orama
|
||||
Group=orama
|
||||
NoNewPrivileges=yes
|
||||
ProtectSystem=strict
|
||||
ProtectHome=yes
|
||||
PrivateTmp=yes
|
||||
LimitNOFILE=65536
|
||||
|
||||
TimeoutStopSec=15s
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=orama-sni-router
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
`, sniRouterBinaryPath, si.configPath())
|
||||
}
|
||||
|
||||
// WriteSystemdUnit writes the unit file. Idempotent.
|
||||
func (si *SNIRouterInstaller) WriteSystemdUnit() error {
|
||||
if err := os.WriteFile(sniRouterSystemdUnitPath, []byte(si.generateSystemdUnit()), 0644); err != nil {
|
||||
return fmt.Errorf("sni-router: write systemd unit %s: %w", sniRouterSystemdUnitPath, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsInstalled reports whether the router binary is present on the node.
|
||||
func (si *SNIRouterInstaller) IsInstalled() bool {
|
||||
_, err := os.Stat(sniRouterBinaryPath)
|
||||
return err == nil
|
||||
}
|
||||
102
core/pkg/environments/production/installers/sni_router_test.go
Normal file
102
core/pkg/environments/production/installers/sni_router_test.go
Normal file
@ -0,0 +1,102 @@
|
||||
package installers
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// newTestSNIRouterInstaller returns an installer rooted at a temp oramaDir so
|
||||
// Configure writes to an isolated location.
|
||||
func newTestSNIRouterInstaller(oramaDir string) *SNIRouterInstaller {
|
||||
return NewSNIRouterInstaller("amd64", io.Discard, oramaDir)
|
||||
}
|
||||
|
||||
// TestGenerateConfig_includesDiscoveryAndFallback verifies the rendered
|
||||
// sni-router.yaml binds :443, falls back to Caddy on the moved HTTPS port, and
|
||||
// emits a turn_discovery block pointing at the node's namespaces dir + base
|
||||
// domain.
|
||||
func TestGenerateConfig_includesDiscoveryAndFallback(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
si := newTestSNIRouterInstaller(dir)
|
||||
|
||||
cfg := si.generateConfig("orama-devnet.network")
|
||||
|
||||
for _, want := range []string{
|
||||
`listen: ":443"`,
|
||||
"fallback:",
|
||||
`addr: "127.0.0.1:8443"`,
|
||||
"turn_discovery:",
|
||||
"base_domain: \"orama-devnet.network\"",
|
||||
"rescan_interval: 30s",
|
||||
"routes: []",
|
||||
} {
|
||||
if !strings.Contains(cfg, want) {
|
||||
t.Errorf("generated sni-router config missing %q\n---\n%s", want, cfg)
|
||||
}
|
||||
}
|
||||
|
||||
// namespaces_dir must be the node's data/namespaces path.
|
||||
wantNS := filepath.Join(dir, "data", "namespaces")
|
||||
if !strings.Contains(cfg, wantNS) {
|
||||
t.Errorf("config missing namespaces_dir %q\n---\n%s", wantNS, cfg)
|
||||
}
|
||||
}
|
||||
|
||||
// TestConfigure_writesFileToConfigsDir verifies Configure persists the YAML to
|
||||
// <oramaDir>/configs/sni-router.yaml.
|
||||
func TestConfigure_writesFileToConfigsDir(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
si := newTestSNIRouterInstaller(dir)
|
||||
|
||||
if err := si.Configure("example.com"); err != nil {
|
||||
t.Fatalf("Configure failed: %v", err)
|
||||
}
|
||||
|
||||
path := filepath.Join(dir, "configs", "sni-router.yaml")
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
t.Fatalf("expected config at %s: %v", path, err)
|
||||
}
|
||||
if !strings.Contains(string(data), "base_domain: \"example.com\"") {
|
||||
t.Errorf("written config missing base_domain; got:\n%s", string(data))
|
||||
}
|
||||
}
|
||||
|
||||
// TestConfigure_rejectsEmptyBaseDomain verifies the installer refuses an empty
|
||||
// base domain rather than emitting a config that would derive bogus hostnames.
|
||||
func TestConfigure_rejectsEmptyBaseDomain(t *testing.T) {
|
||||
si := newTestSNIRouterInstaller(t.TempDir())
|
||||
if err := si.Configure(""); err == nil {
|
||||
t.Errorf("expected error for empty base domain")
|
||||
}
|
||||
}
|
||||
|
||||
// TestGenerateSystemdUnit_shape verifies the unit grants CAP_NET_BIND_SERVICE,
|
||||
// runs as orama, restarts on failure, and points ExecStart at the installed
|
||||
// binary + config.
|
||||
func TestGenerateSystemdUnit_shape(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
si := newTestSNIRouterInstaller(dir)
|
||||
unit := si.generateSystemdUnit()
|
||||
|
||||
for _, want := range []string{
|
||||
"AmbientCapabilities=CAP_NET_BIND_SERVICE",
|
||||
"User=orama",
|
||||
"Restart=on-failure",
|
||||
"EnvironmentFile=-/opt/orama/.orama/data/sni-router.env",
|
||||
// ExecStart must point at the ABSOLUTE config path so it doesn't
|
||||
// depend on WorkingDirectory/$HOME resolution at runtime.
|
||||
"ExecStart=/opt/orama/bin/orama-sni-router --config " + si.configPath(),
|
||||
"Before=caddy.service",
|
||||
} {
|
||||
if !strings.Contains(unit, want) {
|
||||
t.Errorf("systemd unit missing %q\n---\n%s", want, unit)
|
||||
}
|
||||
}
|
||||
if !strings.Contains(si.configPath(), dir) {
|
||||
t.Errorf("configPath %q not rooted at the oramaDir %q", si.configPath(), dir)
|
||||
}
|
||||
}
|
||||
@ -741,11 +741,35 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s
|
||||
ps.logf(" ✓ ntfy config generated (base_url: %s)", ntfyBaseURL)
|
||||
}
|
||||
|
||||
// Stealth TURN-over-443 (feat-124): when the node opted in
|
||||
// (sni_router.enabled in the node.yaml just written above), Caddy
|
||||
// must vacate :443 so the orama-sni-router can own it. Move Caddy's
|
||||
// HTTPS listener to :8443 BEFORE ConfigureCaddy renders the Caddyfile.
|
||||
// When not opted in, the Caddyfile is byte-identical to before.
|
||||
if ps.configGenerator.SNIRouterEnabled() {
|
||||
ps.binaryInstaller.EnableCaddySNIRouterMode()
|
||||
ps.logf(" ✓ SNI router enabled — Caddy HTTPS will bind :8443")
|
||||
}
|
||||
|
||||
if err := ps.binaryInstaller.ConfigureCaddy(caddyDomain, email, acmeEndpoint, baseDomain); err != nil {
|
||||
ps.logf(" ⚠️ Caddy config warning: %v", err)
|
||||
} else {
|
||||
ps.logf(" ✓ Caddy config generated")
|
||||
}
|
||||
|
||||
// Stealth TURN-over-443 (feat-124): when opted in, write the
|
||||
// orama-sni-router config (listen :443, fallback Caddy :8443,
|
||||
// turn_discovery scanning this node's namespaces dir for the cluster's
|
||||
// base domain). The unit lifecycle is driven in Phase5 after Caddy has
|
||||
// moved to :8443. The router uses the base domain as the zone for
|
||||
// stealth/turn.ns-* hostnames.
|
||||
if ps.configGenerator.SNIRouterEnabled() {
|
||||
if err := ps.binaryInstaller.ConfigureSNIRouter(dnsZone); err != nil {
|
||||
ps.logf(" ⚠️ SNI router config warning: %v", err)
|
||||
} else {
|
||||
ps.logf(" ✓ SNI router config generated (zone: %s)", dnsZone)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
@ -871,6 +895,14 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error {
|
||||
}
|
||||
}
|
||||
|
||||
// SNI router unit (feat-124). Write the unit whenever the binary is present
|
||||
// so the daemon-reload below picks it up; the enable/start vs stop/disable
|
||||
// decision (based on sni_router.enabled) happens after Caddy has moved to
|
||||
// :8443, in the start section.
|
||||
if ps.binaryInstaller.WriteSNIRouterUnit() == nil {
|
||||
ps.logf(" ✓ SNI router service unit created: %s", ps.binaryInstaller.SNIRouterServiceName())
|
||||
}
|
||||
|
||||
// Reload systemd daemon
|
||||
if err := ps.serviceController.DaemonReload(); err != nil {
|
||||
return fmt.Errorf("failed to reload systemd: %w", err)
|
||||
@ -980,6 +1012,31 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Stealth TURN-over-443 (feat-124) cutover. Caddy has just been
|
||||
// reconfigured to :8443 and restarted above, so :443 is now free for the
|
||||
// SNI router. When opted in, enable+start the router; when not, stop+disable
|
||||
// it so a node that flipped the flag off cleanly returns :443 to Caddy.
|
||||
sniSvc := ps.binaryInstaller.SNIRouterServiceName()
|
||||
if ps.configGenerator.SNIRouterEnabled() {
|
||||
if err := ps.serviceController.EnableService(sniSvc); err != nil {
|
||||
ps.logf(" ⚠️ Failed to enable %s: %v", sniSvc, err)
|
||||
}
|
||||
if err := ps.serviceController.RestartService(sniSvc); err != nil {
|
||||
ps.logf(" ⚠️ Failed to start %s: %v", sniSvc, err)
|
||||
} else {
|
||||
ps.logf(" - %s started (owns :443)", sniSvc)
|
||||
}
|
||||
} else {
|
||||
// Not opted in: ensure the router is not holding :443. Errors are
|
||||
// non-fatal — the unit may simply not be loaded on this node.
|
||||
if err := ps.serviceController.StopService(sniSvc); err != nil {
|
||||
ps.logf(" ℹ️ %s not running (expected when disabled): %v", sniSvc, err)
|
||||
}
|
||||
if err := ps.serviceController.DisableService(sniSvc); err != nil {
|
||||
ps.logf(" ℹ️ %s not enabled (expected when disabled): %v", sniSvc, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Start ntfy on every node (#72). Caddy must already be up (it
|
||||
// terminates TLS for push.<dnsZone>), which the order above
|
||||
// guarantees.
|
||||
|
||||
72
core/pkg/environments/production/sni_router_test.go
Normal file
72
core/pkg/environments/production/sni_router_test.go
Normal file
@ -0,0 +1,72 @@
|
||||
package production
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestGenerateNodeConfig_preservesSNIRouterEnabled is the regression test for
|
||||
// the feat-124 regen-wipe class of outage (cf. bugboard #259/#846 for webrtc):
|
||||
// a config regeneration must NOT silently reset an operator's
|
||||
// sni_router.enabled: true back to false, which would stop the :443 router and
|
||||
// break stealth TURN. We write a node.yaml with the flag set, regenerate, and
|
||||
// assert it survives.
|
||||
func TestGenerateNodeConfig_preservesSNIRouterEnabled(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeNodeYAML(t, dir, `sni_router:
|
||||
enabled: true
|
||||
|
||||
http_gateway:
|
||||
enabled: true
|
||||
`)
|
||||
|
||||
cg := NewConfigGenerator(dir)
|
||||
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateNodeConfig failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(out, "sni_router:") {
|
||||
t.Fatalf("regenerated node.yaml missing sni_router block\n---\n%s", out)
|
||||
}
|
||||
if !strings.Contains(out, "enabled: true") {
|
||||
t.Errorf("regenerated node.yaml did not preserve sni_router.enabled: true\n---\n%s", out)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGenerateNodeConfig_sniRouterDefaultsFalse verifies a fresh install (no
|
||||
// existing node.yaml) renders sni_router.enabled: false — default OFF.
|
||||
func TestGenerateNodeConfig_sniRouterDefaultsFalse(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
cg := NewConfigGenerator(dir)
|
||||
|
||||
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateNodeConfig failed: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, "sni_router:") {
|
||||
t.Fatalf("node.yaml missing sni_router block\n---\n%s", out)
|
||||
}
|
||||
if !strings.Contains(out, "enabled: false") {
|
||||
t.Errorf("fresh node.yaml should render sni_router.enabled: false\n---\n%s", out)
|
||||
}
|
||||
if cg.SNIRouterEnabled() {
|
||||
t.Errorf("SNIRouterEnabled() should be false on a fresh install")
|
||||
}
|
||||
}
|
||||
|
||||
// TestGenerateNodeConfig_sniRouterDisabledStaysFalse verifies an existing
|
||||
// node.yaml that explicitly disabled the router does not flip on during regen.
|
||||
func TestGenerateNodeConfig_sniRouterDisabledStaysFalse(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeNodeYAML(t, dir, "sni_router:\n enabled: false\nhttp_gateway:\n enabled: true\n")
|
||||
|
||||
cg := NewConfigGenerator(dir)
|
||||
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
|
||||
if err != nil {
|
||||
t.Fatalf("GenerateNodeConfig failed: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, "enabled: false") {
|
||||
t.Errorf("disabled sni_router should stay false on regen\n---\n%s", out)
|
||||
}
|
||||
}
|
||||
@ -15,6 +15,14 @@ node:
|
||||
operator_wallet: "{{.OperatorWallet}}"
|
||||
{{- end}}
|
||||
|
||||
# Stealth TURN-over-443 SNI router (feat-124). When enabled, the node runs
|
||||
# orama-sni-router on :443 and Caddy is moved to :8443; default-OFF so existing
|
||||
# nodes are byte-identical until an operator opts in. This block is preserved
|
||||
# across config regeneration (GenerateNodeConfig carries forward an existing
|
||||
# sni_router.enabled: true).
|
||||
sni_router:
|
||||
enabled: {{if .SNIRouterEnabled}}true{{else}}false{{end}}
|
||||
|
||||
database:
|
||||
data_dir: "{{.DataDir}}/rqlite"
|
||||
replication_factor: 3
|
||||
|
||||
@ -66,6 +66,16 @@ type NodeConfigData struct {
|
||||
SFUPort int // Local SFU signaling port the gateway proxies to
|
||||
TURNDomain string // TURN domain (e.g., "turn.ns-myapp.dbrs.space")
|
||||
TURNSecret string // HMAC-SHA1 shared secret for TURN credential generation
|
||||
|
||||
// SNIRouterEnabled gates the stealth TURN-over-443 SNI router (feat-124).
|
||||
// Rendered as the top-level sni_router.enabled flag. Default false keeps
|
||||
// existing nodes byte-identical (Caddy stays on :443); when true the node
|
||||
// runs orama-sni-router on :443 and Caddy moves to :8443. This value is
|
||||
// carried forward across config regeneration from the existing node.yaml
|
||||
// (see production/config.go populateSNIRouterConfig) so a regen never wipes
|
||||
// an operator's opt-in (the same preserve-from-existing discipline as the
|
||||
// webrtc block, bugboard #259/#846).
|
||||
SNIRouterEnabled bool
|
||||
}
|
||||
|
||||
// GatewayConfigData holds parameters for gateway.yaml rendering
|
||||
|
||||
@ -103,6 +103,36 @@ func TestRenderNodeConfig_webRTC(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderNodeConfig_sniRouter(t *testing.T) {
|
||||
// Enabled: top-level sni_router block renders enabled: true.
|
||||
enabled, err := RenderNodeConfig(NodeConfigData{
|
||||
NodeID: "node1",
|
||||
SNIRouterEnabled: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderNodeConfig failed: %v", err)
|
||||
}
|
||||
if !strings.Contains(enabled, "sni_router:") {
|
||||
t.Errorf("rendered node config missing sni_router block\n---\n%s", enabled)
|
||||
}
|
||||
if !strings.Contains(enabled, "enabled: true") {
|
||||
t.Errorf("sni_router should render enabled: true\n---\n%s", enabled)
|
||||
}
|
||||
|
||||
// Default: the block is always present, defaulting to false (so the flag is
|
||||
// discoverable to operators and round-trips through regen).
|
||||
disabled, err := RenderNodeConfig(NodeConfigData{NodeID: "node1"})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderNodeConfig failed: %v", err)
|
||||
}
|
||||
if !strings.Contains(disabled, "sni_router:") {
|
||||
t.Errorf("sni_router block should always be present\n---\n%s", disabled)
|
||||
}
|
||||
if !strings.Contains(disabled, "enabled: false") {
|
||||
t.Errorf("default sni_router should render enabled: false\n---\n%s", disabled)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderGatewayConfig(t *testing.T) {
|
||||
bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890"
|
||||
data := GatewayConfigData{
|
||||
|
||||
@ -1114,6 +1114,48 @@ func (g *Gateway) namespaceWebRTCDisablePublicHandler(w http.ResponseWriter, r *
|
||||
})
|
||||
}
|
||||
|
||||
// namespaceWebRTCStealthPublicHandler handles POST /v1/namespace/webrtc/stealth/{enable|disable}
|
||||
// (feat-124). Public: authenticated by JWT/API key via auth middleware;
|
||||
// namespace from context. `enable` is true for the enable route.
|
||||
func (g *Gateway) namespaceWebRTCStealthPublicHandler(w http.ResponseWriter, r *http.Request, enable bool) {
|
||||
if r.Method != http.MethodPost {
|
||||
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
namespaceName, _ := r.Context().Value(CtxKeyNamespaceOverride).(string)
|
||||
if namespaceName == "" {
|
||||
writeError(w, http.StatusForbidden, "namespace not resolved")
|
||||
return
|
||||
}
|
||||
|
||||
if g.webrtcManager == nil {
|
||||
writeError(w, http.StatusServiceUnavailable, "WebRTC management not enabled")
|
||||
return
|
||||
}
|
||||
|
||||
var err error
|
||||
action := "disabled"
|
||||
if enable {
|
||||
action = "enabled"
|
||||
err = g.webrtcManager.EnableWebRTCStealth(r.Context(), namespaceName)
|
||||
} else {
|
||||
err = g.webrtcManager.DisableWebRTCStealth(r.Context(), namespaceName)
|
||||
}
|
||||
if err != nil {
|
||||
writeError(w, http.StatusInternalServerError, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"status": "ok",
|
||||
"namespace": namespaceName,
|
||||
"message": "WebRTC stealth " + action + " successfully",
|
||||
})
|
||||
}
|
||||
|
||||
// namespaceWebRTCStatusPublicHandler handles GET /v1/namespace/webrtc/status
|
||||
// Public: authenticated by JWT/API key via auth middleware. Namespace from context.
|
||||
func (g *Gateway) namespaceWebRTCStatusPublicHandler(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
@ -64,6 +64,12 @@ type WebRTCManager interface {
|
||||
DisableWebRTC(ctx context.Context, namespaceName string) error
|
||||
// GetWebRTCStatus returns the WebRTC config for a namespace, or nil if not enabled.
|
||||
GetWebRTCStatus(ctx context.Context, namespaceName string) (interface{}, error)
|
||||
// EnableWebRTCStealth / DisableWebRTCStealth toggle the censorship-
|
||||
// resistant TURNS:443 path (feat-124): stealth cert on the TURN servers,
|
||||
// stealth DNS records, and the turns:<stealth-host>:443 rung in the
|
||||
// turn.credentials URI ladder. Requires WebRTC to already be enabled.
|
||||
EnableWebRTCStealth(ctx context.Context, namespaceName string) error
|
||||
DisableWebRTCStealth(ctx context.Context, namespaceName string) error
|
||||
}
|
||||
|
||||
// Handlers holds dependencies for authentication HTTP handlers
|
||||
|
||||
@ -53,6 +53,8 @@ type SpawnRequest struct {
|
||||
GatewaySFUPort int `json:"gateway_sfu_port,omitempty"`
|
||||
GatewayTURNDomain string `json:"gateway_turn_domain,omitempty"`
|
||||
GatewayTURNSecret string `json:"gateway_turn_secret,omitempty"`
|
||||
// Stealth TURNS:443 host (feat-124); empty when stealth is disabled.
|
||||
GatewayTURNStealthDomain string `json:"gateway_turn_stealth_domain,omitempty"`
|
||||
// Host serverless secrets encryption key forwarded to the spawned
|
||||
// namespace gateway (bugboard #837 follow-up). Same value on every node.
|
||||
GatewaySecretsEncryptionKey string `json:"gateway_secrets_encryption_key,omitempty"`
|
||||
@ -75,6 +77,7 @@ type SpawnRequest struct {
|
||||
TURNRelayStart int `json:"turn_relay_start,omitempty"`
|
||||
TURNRelayEnd int `json:"turn_relay_end,omitempty"`
|
||||
TURNDomain string `json:"turn_domain,omitempty"`
|
||||
TURNStealthDomain string `json:"turn_stealth_domain,omitempty"`
|
||||
|
||||
// Cluster state (when action = "save-cluster-state")
|
||||
ClusterState json.RawMessage `json:"cluster_state,omitempty"`
|
||||
@ -237,6 +240,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
WebRTCEnabled: req.GatewayWebRTCEnabled,
|
||||
SFUPort: req.GatewaySFUPort,
|
||||
TURNDomain: req.GatewayTURNDomain,
|
||||
TURNStealthDomain: req.GatewayTURNStealthDomain,
|
||||
TURNSecret: req.GatewayTURNSecret,
|
||||
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
|
||||
}
|
||||
@ -291,6 +295,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
WebRTCEnabled: req.GatewayWebRTCEnabled,
|
||||
SFUPort: req.GatewaySFUPort,
|
||||
TURNDomain: req.GatewayTURNDomain,
|
||||
TURNStealthDomain: req.GatewayTURNStealthDomain,
|
||||
TURNSecret: req.GatewayTURNSecret,
|
||||
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
|
||||
}
|
||||
@ -360,6 +365,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
RelayPortStart: req.TURNRelayStart,
|
||||
RelayPortEnd: req.TURNRelayEnd,
|
||||
TURNDomain: req.TURNDomain,
|
||||
StealthDomain: req.TURNStealthDomain,
|
||||
}
|
||||
if err := h.systemdSpawner.SpawnTURN(ctx, req.Namespace, req.NodeID, cfg); err != nil {
|
||||
h.logger.Error("Failed to spawn TURN instance", zap.Error(err))
|
||||
|
||||
@ -95,6 +95,11 @@ type InstanceConfig struct {
|
||||
SFUPort int // SFU signaling port on this node
|
||||
TURNDomain string // TURN server domain (e.g., "turn.ns-alice.orama-devnet.network")
|
||||
TURNSecret string // TURN shared secret for credential generation
|
||||
// TURNStealthDomain is the neutral stealth TURNS host (feat-124,
|
||||
// cdn-<hash>.<base-domain>). Non-empty only when webrtc stealth is
|
||||
// enabled for the namespace; turn.credentials then advertises
|
||||
// `turns:<TURNStealthDomain>:443` as the final URI-ladder rung.
|
||||
TURNStealthDomain string
|
||||
// SecretsEncryptionKey is the host-wide AES-256 serverless secrets
|
||||
// encryption key (hex-encoded). Bugboard #837 follow-up: the host gateway
|
||||
// receives this via gateway.Config but spawned namespace gateways never
|
||||
@ -113,6 +118,7 @@ type GatewayYAMLWebRTC struct {
|
||||
SFUPort int `yaml:"sfu_port,omitempty"`
|
||||
TURNDomain string `yaml:"turn_domain,omitempty"`
|
||||
TURNSecret string `yaml:"turn_secret,omitempty"`
|
||||
TURNStealthDomain string `yaml:"turn_stealth_domain,omitempty"`
|
||||
}
|
||||
|
||||
// GatewayYAMLConfig represents the gateway YAML configuration structure
|
||||
@ -338,6 +344,7 @@ func (is *InstanceSpawner) generateConfig(configPath string, cfg InstanceConfig,
|
||||
SFUPort: cfg.SFUPort,
|
||||
TURNDomain: cfg.TURNDomain,
|
||||
TURNSecret: cfg.TURNSecret,
|
||||
TURNStealthDomain: cfg.TURNStealthDomain,
|
||||
},
|
||||
SecretsEncryptionKey: cfg.SecretsEncryptionKey,
|
||||
}
|
||||
|
||||
@ -67,6 +67,12 @@ func (g *Gateway) Routes() http.Handler {
|
||||
// Namespace WebRTC enable/disable/status (public, JWT/API key auth via middleware)
|
||||
mux.HandleFunc("/v1/namespace/webrtc/enable", g.namespaceWebRTCEnablePublicHandler)
|
||||
mux.HandleFunc("/v1/namespace/webrtc/disable", g.namespaceWebRTCDisablePublicHandler)
|
||||
mux.HandleFunc("/v1/namespace/webrtc/stealth/enable", func(w http.ResponseWriter, r *http.Request) {
|
||||
g.namespaceWebRTCStealthPublicHandler(w, r, true)
|
||||
})
|
||||
mux.HandleFunc("/v1/namespace/webrtc/stealth/disable", func(w http.ResponseWriter, r *http.Request) {
|
||||
g.namespaceWebRTCStealthPublicHandler(w, r, false)
|
||||
})
|
||||
mux.HandleFunc("/v1/namespace/webrtc/status", g.namespaceWebRTCStatusPublicHandler)
|
||||
|
||||
// auth endpoints
|
||||
|
||||
@ -695,6 +695,7 @@ func (cm *ClusterManager) spawnGatewayRemote(ctx context.Context, nodeIP string,
|
||||
"gateway_sfu_port": cfg.SFUPort,
|
||||
"gateway_turn_domain": cfg.TURNDomain,
|
||||
"gateway_turn_secret": cfg.TURNSecret,
|
||||
"gateway_turn_stealth_domain": cfg.TURNStealthDomain,
|
||||
// Bugboard #837 follow-up: carry the host secrets encryption key to
|
||||
// the remote node so its spawned namespace gateway can manage secrets.
|
||||
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
|
||||
@ -1614,6 +1615,7 @@ func (cm *ClusterManager) restoreClusterOnNode(ctx context.Context, clusterID, n
|
||||
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
||||
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", namespaceName, cm.baseDomain)
|
||||
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
||||
gwCfg.TURNStealthDomain = cm.stealthDomainFor(namespaceName, webrtcCfg)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1681,6 +1683,7 @@ type ClusterLocalState struct {
|
||||
HasTURN bool `json:"has_turn,omitempty"`
|
||||
TURNSharedSecret string `json:"turn_shared_secret,omitempty"` // Needed for gateway to generate TURN credentials on cold start
|
||||
TURNDomain string `json:"turn_domain,omitempty"` // TURN server domain for gateway config
|
||||
TURNStealthDomain string `json:"turn_stealth_domain,omitempty"` // Stealth TURNS:443 host (feat-124); empty when stealth disabled
|
||||
TURNCredentialTTL int `json:"turn_credential_ttl,omitempty"`
|
||||
SFUSignalingPort int `json:"sfu_signaling_port,omitempty"`
|
||||
SFUMediaPortStart int `json:"sfu_media_port_start,omitempty"`
|
||||
@ -1840,6 +1843,7 @@ type restoreWebRTC struct {
|
||||
sfuPort int
|
||||
turnDomain string
|
||||
turnSecret string
|
||||
stealthDomain string // feat-124: empty when webrtc stealth is disabled
|
||||
}
|
||||
|
||||
// chooseRestoreWebRTC resolves a restored gateway's WebRTC config. TWO
|
||||
@ -1864,11 +1868,12 @@ type restoreWebRTC struct {
|
||||
// Extracted as a pure function so the precedence is unit-testable without
|
||||
// standing up the full restore path (systemd spawner + DB + port store).
|
||||
func chooseRestoreWebRTC(
|
||||
stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret string,
|
||||
dbFetch func() (turnSecret, turnDomain string, sfuPort int),
|
||||
stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret, stateStealthDomain string,
|
||||
dbFetch func() (turnSecret, turnDomain, stealthDomain string, sfuPort int),
|
||||
) restoreWebRTC {
|
||||
turnSecret := stateTURNSecret
|
||||
turnDomain := stateTURNDomain
|
||||
stealthDomain := stateStealthDomain
|
||||
sfuPort := 0
|
||||
if stateHasSFU && stateSFUPort > 0 {
|
||||
sfuPort = stateSFUPort
|
||||
@ -1878,12 +1883,17 @@ func chooseRestoreWebRTC(
|
||||
// the marker that the namespace has WebRTC enabled at all. The state
|
||||
// file is not updated by EnableWebRTC, so a namespace enabled after
|
||||
// the state file was written reaches here with an empty secret.
|
||||
// (Stealth toggles DO rewrite cluster state on every node, so the
|
||||
// state-first read stays fresh for stealthDomain too.)
|
||||
if turnSecret == "" {
|
||||
if dbSecret, dbDomain, dbSFU := dbFetch(); dbSecret != "" {
|
||||
if dbSecret, dbDomain, dbStealth, dbSFU := dbFetch(); dbSecret != "" {
|
||||
turnSecret = dbSecret
|
||||
if turnDomain == "" {
|
||||
turnDomain = dbDomain
|
||||
}
|
||||
if stealthDomain == "" {
|
||||
stealthDomain = dbStealth
|
||||
}
|
||||
if sfuPort == 0 {
|
||||
sfuPort = dbSFU
|
||||
}
|
||||
@ -1895,6 +1905,7 @@ func chooseRestoreWebRTC(
|
||||
sfuPort: sfuPort,
|
||||
turnDomain: turnDomain,
|
||||
turnSecret: turnSecret,
|
||||
stealthDomain: stealthDomain,
|
||||
}
|
||||
}
|
||||
|
||||
@ -2050,11 +2061,11 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
||||
// fields here. The lazy dbFetch only hits the DB when the state
|
||||
// file is incomplete.
|
||||
wr := chooseRestoreWebRTC(
|
||||
state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret,
|
||||
func() (turnSecret, turnDomain string, sfuPort int) {
|
||||
state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret, state.TURNStealthDomain,
|
||||
func() (turnSecret, turnDomain, stealthDomain string, sfuPort int) {
|
||||
webrtcCfg, err := cm.GetWebRTCConfig(ctx, state.NamespaceName)
|
||||
if err != nil || webrtcCfg == nil {
|
||||
return "", "", 0
|
||||
return "", "", "", 0
|
||||
}
|
||||
// TURN is namespace-wide; SFU port is per-node and may be
|
||||
// absent on a gateway-only (non-SFU) node — that's fine,
|
||||
@ -2065,6 +2076,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
||||
}
|
||||
return webrtcCfg.TURNSharedSecret,
|
||||
fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
|
||||
cm.stealthDomainFor(state.NamespaceName, webrtcCfg),
|
||||
sfu
|
||||
},
|
||||
)
|
||||
@ -2076,6 +2088,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
||||
gwCfg.SFUPort = wr.sfuPort
|
||||
gwCfg.TURNDomain = wr.turnDomain
|
||||
gwCfg.TURNSecret = wr.turnSecret
|
||||
gwCfg.TURNStealthDomain = wr.stealthDomain
|
||||
}
|
||||
|
||||
resp, err := http.Get(fmt.Sprintf("http://localhost:%d/v1/health", pb.GatewayHTTPPort))
|
||||
@ -2126,6 +2139,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
||||
RelayPortStart: state.TURNRelayPortStart,
|
||||
RelayPortEnd: state.TURNRelayPortEnd,
|
||||
TURNDomain: fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
|
||||
StealthDomain: cm.stealthDomainFor(state.NamespaceName, webrtcCfg),
|
||||
}
|
||||
if err := cm.systemdSpawner.SpawnTURN(ctx, state.NamespaceName, cm.localNodeID, turnCfg); err != nil {
|
||||
cm.logger.Error("Failed to restore TURN from state", zap.String("namespace", state.NamespaceName), zap.Error(err))
|
||||
|
||||
263
core/pkg/namespace/cluster_manager_stealth.go
Normal file
263
core/pkg/namespace/cluster_manager_stealth.go
Normal file
@ -0,0 +1,263 @@
|
||||
package namespace
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/client"
|
||||
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Stealth TURNS-over-443 lifecycle (feat-124, censorship-resistant calling).
|
||||
//
|
||||
// Enabling stealth for a namespace whose WebRTC is already running:
|
||||
// 1. creates DNS A records for the neutral stealth host -> the TURN nodes,
|
||||
// 2. flips namespace_webrtc_config.stealth_enabled,
|
||||
// 3. re-spawns the namespace's TURN servers with the stealth domain (the
|
||||
// spawner provisions a Let's Encrypt cert for it — hard-fail, never
|
||||
// self-signed),
|
||||
// 4. rewrites cluster-state.json on every node (so DB-less restores keep
|
||||
// the stealth domain), and
|
||||
// 5. restarts the namespace gateways so turn.credentials advertises
|
||||
// `turns:<stealth-host>:443` as the final URI-ladder rung.
|
||||
//
|
||||
// The SNI router on :443 discovers the route (stealth host -> local TURN TLS
|
||||
// port) from the TURN config files on disk — no extra registration step.
|
||||
|
||||
// stealthDomainFor returns the namespace's stealth TURNS host when stealth is
|
||||
// enabled in its WebRTC config, else "" (callers treat empty as disabled).
|
||||
func (cm *ClusterManager) stealthDomainFor(namespaceName string, webrtcCfg *WebRTCConfig) string {
|
||||
if webrtcCfg == nil || !webrtcCfg.StealthEnabled {
|
||||
return ""
|
||||
}
|
||||
return turn.StealthHostForNamespace(namespaceName, cm.baseDomain)
|
||||
}
|
||||
|
||||
// EnableWebRTCStealth enables the stealth TURNS:443 path for a namespace.
|
||||
// Requires WebRTC to already be enabled.
|
||||
func (cm *ClusterManager) EnableWebRTCStealth(ctx context.Context, namespaceName string) error {
|
||||
cluster, webrtcCfg, err := cm.getStealthPrereqs(ctx, namespaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if webrtcCfg.StealthEnabled {
|
||||
return ErrWebRTCStealthAlreadyEnabled
|
||||
}
|
||||
|
||||
stealthDomain := turn.StealthHostForNamespace(namespaceName, cm.baseDomain)
|
||||
cm.logger.Info("Enabling WebRTC stealth for namespace",
|
||||
zap.String("namespace", namespaceName),
|
||||
zap.String("stealth_domain", stealthDomain))
|
||||
|
||||
clusterNodes, err := cm.getClusterNodesWithIPs(ctx, cluster.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get cluster nodes: %w", err)
|
||||
}
|
||||
turnBlocks, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get TURN allocations for namespace %s: %w", namespaceName, err)
|
||||
}
|
||||
if len(turnBlocks) == 0 {
|
||||
return fmt.Errorf("no TURN allocations found for namespace %s (is WebRTC fully enabled?)", namespaceName)
|
||||
}
|
||||
|
||||
// DNS first — cert provisioning and clients both need the name to resolve.
|
||||
var turnIPs []string
|
||||
for _, block := range turnBlocks {
|
||||
for _, n := range clusterNodes {
|
||||
if n.NodeID == block.NodeID {
|
||||
turnIPs = append(turnIPs, n.PublicIP)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := cm.dnsManager.CreateStealthTURNRecords(ctx, namespaceName, stealthDomain, turnIPs); err != nil {
|
||||
return fmt.Errorf("failed to create stealth DNS records: %w", err)
|
||||
}
|
||||
|
||||
if err := cm.setStealthEnabled(ctx, cluster.ID, true); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Re-spawn TURN with the stealth domain; roll back on failure so the
|
||||
// board never claims a stealth endpoint that doesn't terminate TLS.
|
||||
if err := cm.respawnTURNWithStealth(ctx, cluster, clusterNodes, turnBlocks, webrtcCfg.TURNSharedSecret, stealthDomain); err != nil {
|
||||
cm.rollbackStealthEnable(ctx, cluster.ID, namespaceName)
|
||||
return fmt.Errorf("failed to re-spawn TURN with stealth cert (stealth rolled back): %w", err)
|
||||
}
|
||||
|
||||
cm.refreshStateAndGateways(ctx, cluster, clusterNodes, stealthDomain, webrtcCfg.TURNSharedSecret)
|
||||
cm.logEvent(ctx, cluster.ID, EventWebRTCEnabled, "",
|
||||
fmt.Sprintf("WebRTC stealth enabled (%s)", stealthDomain), nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// DisableWebRTCStealth turns the stealth TURNS:443 path off again. TURN and
|
||||
// the baseline ladder (udp/tcp 3478, turns:5349) keep running.
|
||||
func (cm *ClusterManager) DisableWebRTCStealth(ctx context.Context, namespaceName string) error {
|
||||
cluster, webrtcCfg, err := cm.getStealthPrereqs(ctx, namespaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !webrtcCfg.StealthEnabled {
|
||||
return ErrWebRTCStealthNotEnabled
|
||||
}
|
||||
|
||||
cm.logger.Info("Disabling WebRTC stealth for namespace", zap.String("namespace", namespaceName))
|
||||
|
||||
clusterNodes, err := cm.getClusterNodesWithIPs(ctx, cluster.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get cluster nodes: %w", err)
|
||||
}
|
||||
turnBlocks, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get TURN allocations: %w", err)
|
||||
}
|
||||
|
||||
if err := cm.setStealthEnabled(ctx, cluster.ID, false); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cm.respawnTURNWithStealth(ctx, cluster, clusterNodes, turnBlocks, webrtcCfg.TURNSharedSecret, ""); err != nil {
|
||||
return fmt.Errorf("failed to re-spawn TURN without stealth: %w", err)
|
||||
}
|
||||
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
|
||||
cm.logger.Warn("Failed to delete stealth DNS records", zap.Error(err))
|
||||
}
|
||||
cm.refreshStateAndGateways(ctx, cluster, clusterNodes, "", webrtcCfg.TURNSharedSecret)
|
||||
cm.logEvent(ctx, cluster.ID, EventWebRTCDisabled, "", "WebRTC stealth disabled", nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// getStealthPrereqs validates the cluster exists and WebRTC is enabled,
|
||||
// returning both records (with the TURN secret already decrypted).
|
||||
func (cm *ClusterManager) getStealthPrereqs(ctx context.Context, namespaceName string) (*NamespaceCluster, *WebRTCConfig, error) {
|
||||
cluster, err := cm.GetClusterByNamespace(ctx, namespaceName)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to get cluster: %w", err)
|
||||
}
|
||||
if cluster == nil {
|
||||
return nil, nil, ErrClusterNotFound
|
||||
}
|
||||
webrtcCfg, err := cm.GetWebRTCConfig(ctx, namespaceName)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to get WebRTC config: %w", err)
|
||||
}
|
||||
if webrtcCfg == nil {
|
||||
return nil, nil, ErrWebRTCNotEnabled
|
||||
}
|
||||
return cluster, webrtcCfg, nil
|
||||
}
|
||||
|
||||
// setStealthEnabled flips the stealth flag in namespace_webrtc_config.
|
||||
func (cm *ClusterManager) setStealthEnabled(ctx context.Context, clusterID string, enabled bool) error {
|
||||
internalCtx := client.WithInternalAuth(ctx)
|
||||
val := 0
|
||||
if enabled {
|
||||
val = 1
|
||||
}
|
||||
if _, err := cm.db.Exec(internalCtx,
|
||||
`UPDATE namespace_webrtc_config SET stealth_enabled = ? WHERE namespace_cluster_id = ? AND enabled = 1`,
|
||||
val, clusterID); err != nil {
|
||||
return fmt.Errorf("failed to update stealth_enabled: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// respawnTURNWithStealth stops and re-spawns every TURN instance of the
|
||||
// cluster with the given stealth domain ("" = stealth off). The spawner
|
||||
// provisions the stealth cert and writes the new TURN config; the SNI
|
||||
// router's discovery picks the route change up from disk.
|
||||
func (cm *ClusterManager) respawnTURNWithStealth(
|
||||
ctx context.Context,
|
||||
cluster *NamespaceCluster,
|
||||
clusterNodes []clusterNodeInfo,
|
||||
turnBlocks []WebRTCPortBlock,
|
||||
turnSecret, stealthDomain string,
|
||||
) error {
|
||||
turnDomain := fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||
for _, block := range turnBlocks {
|
||||
var node *clusterNodeInfo
|
||||
for i := range clusterNodes {
|
||||
if clusterNodes[i].NodeID == block.NodeID {
|
||||
node = &clusterNodes[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if node == nil {
|
||||
return fmt.Errorf("TURN node %s not found in cluster nodes", block.NodeID)
|
||||
}
|
||||
|
||||
cm.stopTURNOnNode(ctx, node.NodeID, node.InternalIP, cluster.NamespaceName)
|
||||
turnCfg := TURNInstanceConfig{
|
||||
Namespace: cluster.NamespaceName,
|
||||
NodeID: node.NodeID,
|
||||
ListenAddr: fmt.Sprintf("0.0.0.0:%d", block.TURNListenPort),
|
||||
TURNSListenAddr: fmt.Sprintf("0.0.0.0:%d", block.TURNTLSPort),
|
||||
PublicIP: node.PublicIP,
|
||||
Realm: cm.baseDomain,
|
||||
AuthSecret: turnSecret,
|
||||
RelayPortStart: block.TURNRelayPortStart,
|
||||
RelayPortEnd: block.TURNRelayPortEnd,
|
||||
TURNDomain: turnDomain,
|
||||
StealthDomain: stealthDomain,
|
||||
}
|
||||
if err := cm.spawnTURNOnNode(ctx, *node, cluster.NamespaceName, turnCfg); err != nil {
|
||||
return fmt.Errorf("failed to re-spawn TURN on node %s: %w", node.NodeID, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rollbackStealthEnable best-effort reverts the DB flag + DNS records after a
|
||||
// failed stealth enable, so the system never advertises a half-built path.
|
||||
func (cm *ClusterManager) rollbackStealthEnable(ctx context.Context, clusterID, namespaceName string) {
|
||||
if err := cm.setStealthEnabled(ctx, clusterID, false); err != nil {
|
||||
cm.logger.Warn("Stealth rollback: failed to clear stealth_enabled", zap.Error(err))
|
||||
}
|
||||
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
|
||||
cm.logger.Warn("Stealth rollback: failed to delete DNS records", zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
// refreshStateAndGateways rewrites cluster-state.json on all nodes with the
|
||||
// new stealth domain and restarts the namespace gateways so turn.credentials
|
||||
// reflects the change. Failures are logged per node (the reconciler converges
|
||||
// stragglers later via the gatewayConfigInSync drift check).
|
||||
func (cm *ClusterManager) refreshStateAndGateways(
|
||||
ctx context.Context,
|
||||
cluster *NamespaceCluster,
|
||||
clusterNodes []clusterNodeInfo,
|
||||
stealthDomain, turnSecret string,
|
||||
) {
|
||||
turnDomain := fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||
|
||||
sfuBlockList, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "sfu")
|
||||
if err != nil {
|
||||
cm.logger.Warn("Failed to get SFU allocations for state refresh", zap.Error(err))
|
||||
}
|
||||
turnBlockList, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
|
||||
if err != nil {
|
||||
cm.logger.Warn("Failed to get TURN allocations for state refresh", zap.Error(err))
|
||||
}
|
||||
sfuBlocks := make(map[string]*WebRTCPortBlock)
|
||||
for i := range sfuBlockList {
|
||||
sfuBlocks[sfuBlockList[i].NodeID] = &sfuBlockList[i]
|
||||
}
|
||||
turnBlocks := make(map[string]*WebRTCPortBlock)
|
||||
for i := range turnBlockList {
|
||||
turnBlocks[turnBlockList[i].NodeID] = &turnBlockList[i]
|
||||
}
|
||||
|
||||
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, stealthDomain, turnSecret)
|
||||
|
||||
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
||||
if err != nil {
|
||||
cm.logger.Warn("Failed to get port blocks for gateway restart after stealth toggle", zap.Error(err))
|
||||
return
|
||||
}
|
||||
nodePortBlocks := make(map[string]*PortBlock)
|
||||
for i := range portBlocks {
|
||||
nodePortBlocks[portBlocks[i].NodeID] = &portBlocks[i]
|
||||
}
|
||||
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, stealthDomain, turnSecret)
|
||||
}
|
||||
@ -204,10 +204,10 @@ func (cm *ClusterManager) EnableWebRTC(ctx context.Context, namespaceName, enabl
|
||||
}
|
||||
|
||||
// 14. Update cluster-state.json on all nodes with WebRTC info
|
||||
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, turnSecret)
|
||||
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, "", turnSecret)
|
||||
|
||||
// 15. Restart namespace gateways with WebRTC config so they register WebRTC routes
|
||||
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, turnSecret)
|
||||
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, "", turnSecret)
|
||||
|
||||
cm.logEvent(ctx, cluster.ID, EventWebRTCEnabled, "",
|
||||
fmt.Sprintf("WebRTC enabled: SFU on %d nodes, TURN on %d nodes", len(clusterNodes), len(turnNodes)), nil)
|
||||
@ -273,17 +273,23 @@ func (cm *ClusterManager) DisableWebRTC(ctx context.Context, namespaceName strin
|
||||
cm.logger.Warn("Failed to deallocate WebRTC ports", zap.Error(err))
|
||||
}
|
||||
|
||||
// 7. Delete TURN DNS records
|
||||
// 7. Delete TURN DNS records (both the regular and the feat-124 stealth
|
||||
// records — a full WebRTC teardown must not orphan stealth A records when
|
||||
// the namespace had stealth enabled). Delete-by-tag is a no-op when the
|
||||
// stealth records are absent, so this is safe unconditionally.
|
||||
if err := cm.dnsManager.DeleteTURNRecords(ctx, namespaceName); err != nil {
|
||||
cm.logger.Warn("Failed to delete TURN DNS records", zap.Error(err))
|
||||
}
|
||||
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
|
||||
cm.logger.Warn("Failed to delete stealth TURN DNS records", zap.Error(err))
|
||||
}
|
||||
|
||||
// 8. Clean up DB tables
|
||||
cm.db.Exec(internalCtx, `DELETE FROM webrtc_rooms WHERE namespace_cluster_id = ?`, cluster.ID)
|
||||
cm.db.Exec(internalCtx, `DELETE FROM namespace_webrtc_config WHERE namespace_cluster_id = ?`, cluster.ID)
|
||||
|
||||
// 9. Update cluster-state.json to remove WebRTC info
|
||||
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, nil, nil, "", "")
|
||||
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, nil, nil, "", "", "")
|
||||
|
||||
// 10. Restart namespace gateways without WebRTC config so they unregister WebRTC routes
|
||||
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
||||
@ -292,7 +298,7 @@ func (cm *ClusterManager) DisableWebRTC(ctx context.Context, namespaceName strin
|
||||
for i := range portBlocks {
|
||||
nodePortBlocks[portBlocks[i].NodeID] = &portBlocks[i]
|
||||
}
|
||||
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, nil, "", "")
|
||||
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, nil, "", "", "")
|
||||
} else {
|
||||
cm.logger.Warn("Failed to get port blocks for gateway restart after WebRTC disable", zap.Error(err))
|
||||
}
|
||||
@ -498,6 +504,7 @@ func (cm *ClusterManager) spawnTURNRemote(ctx context.Context, nodeIP string, cf
|
||||
"turn_relay_start": cfg.RelayPortStart,
|
||||
"turn_relay_end": cfg.RelayPortEnd,
|
||||
"turn_domain": cfg.TURNDomain,
|
||||
"turn_stealth_domain": cfg.StealthDomain,
|
||||
})
|
||||
return err
|
||||
}
|
||||
@ -558,7 +565,7 @@ func (cm *ClusterManager) updateClusterStateWithWebRTC(
|
||||
nodes []clusterNodeInfo,
|
||||
sfuBlocks map[string]*WebRTCPortBlock,
|
||||
turnBlocks map[string]*WebRTCPortBlock,
|
||||
turnDomain, turnSecret string,
|
||||
turnDomain, turnStealthDomain, turnSecret string,
|
||||
) {
|
||||
// Get existing port blocks for base state
|
||||
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
||||
@ -635,6 +642,7 @@ func (cm *ClusterManager) updateClusterStateWithWebRTC(
|
||||
}
|
||||
// Persist TURN domain and secret so gateways can be restored on cold start
|
||||
state.TURNDomain = turnDomain
|
||||
state.TURNStealthDomain = turnStealthDomain
|
||||
state.TURNSharedSecret = turnSecret
|
||||
|
||||
if node.NodeID == cm.localNodeID {
|
||||
@ -671,7 +679,7 @@ func (cm *ClusterManager) restartGatewaysWithWebRTC(
|
||||
nodes []clusterNodeInfo,
|
||||
portBlocks map[string]*PortBlock,
|
||||
sfuBlocks map[string]*WebRTCPortBlock,
|
||||
turnDomain, turnSecret string,
|
||||
turnDomain, turnStealthDomain, turnSecret string,
|
||||
) {
|
||||
// Build Olric server addresses from port blocks + node IPs
|
||||
var olricServers []string
|
||||
@ -715,6 +723,7 @@ func (cm *ClusterManager) restartGatewaysWithWebRTC(
|
||||
WebRTCEnabled: webrtcEnabled,
|
||||
SFUPort: sfuPort,
|
||||
TURNDomain: turnDomain,
|
||||
TURNStealthDomain: turnStealthDomain,
|
||||
TURNSecret: turnSecret,
|
||||
// Bugboard #837 follow-up: preserve the secrets key on WebRTC
|
||||
// restarts so enabling WebRTC doesn't drop secrets management.
|
||||
@ -766,6 +775,7 @@ func (cm *ClusterManager) restartGatewayRemote(ctx context.Context, nodeIP strin
|
||||
"gateway_webrtc_enabled": cfg.WebRTCEnabled,
|
||||
"gateway_sfu_port": cfg.SFUPort,
|
||||
"gateway_turn_domain": cfg.TURNDomain,
|
||||
"gateway_turn_stealth_domain": cfg.TURNStealthDomain,
|
||||
"gateway_turn_secret": cfg.TURNSecret,
|
||||
// Bugboard #837 follow-up: preserve the secrets key on WebRTC restarts.
|
||||
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
|
||||
|
||||
@ -537,6 +537,7 @@ func (cm *ClusterManager) ReplaceClusterNode(ctx context.Context, cluster *Names
|
||||
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
||||
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
||||
gwCfg.TURNStealthDomain = cm.stealthDomainFor(cluster.NamespaceName, webrtcCfg)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1080,6 +1081,7 @@ func (cm *ClusterManager) addNodeToCluster(
|
||||
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
||||
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
||||
gwCfg.TURNStealthDomain = cm.stealthDomainFor(cluster.NamespaceName, webrtcCfg)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -353,6 +353,78 @@ func (drm *DNSRecordManager) DeleteTURNRecords(ctx context.Context, namespaceNam
|
||||
return nil
|
||||
}
|
||||
|
||||
// stealthDNSNamespace is the dns_records ownership tag for a namespace's
|
||||
// stealth TURNS records, distinct from "namespace-turn:" so deleting one set
|
||||
// never touches the other.
|
||||
func stealthDNSNamespace(namespaceName string) string {
|
||||
return "namespace-turn-stealth:" + namespaceName
|
||||
}
|
||||
|
||||
// CreateStealthTURNRecords creates DNS A records for the stealth TURNS host
|
||||
// (feat-124): <stealthHost> -> TURN node IPs. The hostname is the neutral
|
||||
// cdn-<hash>.<base-domain> label from turn.StealthHostForNamespace — it lives
|
||||
// directly under the base domain (NOT under ns-<namespace>) so the SNI string
|
||||
// never identifies the app.
|
||||
func (drm *DNSRecordManager) CreateStealthTURNRecords(ctx context.Context, namespaceName, stealthHost string, turnIPs []string) error {
|
||||
internalCtx := client.WithInternalAuth(ctx)
|
||||
|
||||
if stealthHost == "" {
|
||||
return &ClusterError{Message: "no stealth host provided for DNS records"}
|
||||
}
|
||||
if len(turnIPs) == 0 {
|
||||
return &ClusterError{Message: "no TURN IPs provided for stealth DNS records"}
|
||||
}
|
||||
|
||||
fqdn := stealthHost + "."
|
||||
|
||||
drm.logger.Info("Creating stealth TURNS DNS records",
|
||||
zap.String("namespace", namespaceName),
|
||||
zap.String("fqdn", fqdn),
|
||||
zap.Strings("turn_ips", turnIPs),
|
||||
)
|
||||
|
||||
deleteQuery := `DELETE FROM dns_records WHERE namespace = ?`
|
||||
_, _ = drm.db.Exec(internalCtx, deleteQuery, stealthDNSNamespace(namespaceName))
|
||||
|
||||
now := time.Now()
|
||||
for _, ip := range turnIPs {
|
||||
insertQuery := `
|
||||
INSERT INTO dns_records (
|
||||
fqdn, record_type, value, ttl, namespace, created_by, created_at, updated_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
_, err := drm.db.Exec(internalCtx, insertQuery,
|
||||
fqdn, "A", ip, 60,
|
||||
stealthDNSNamespace(namespaceName),
|
||||
"cluster-manager",
|
||||
now, now,
|
||||
)
|
||||
if err != nil {
|
||||
return &ClusterError{
|
||||
Message: fmt.Sprintf("failed to create stealth TURNS DNS record %s -> %s", fqdn, ip),
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteStealthTURNRecords deletes a namespace's stealth TURNS DNS records.
|
||||
func (drm *DNSRecordManager) DeleteStealthTURNRecords(ctx context.Context, namespaceName string) error {
|
||||
internalCtx := client.WithInternalAuth(ctx)
|
||||
|
||||
deleteQuery := `DELETE FROM dns_records WHERE namespace = ?`
|
||||
_, err := drm.db.Exec(internalCtx, deleteQuery, stealthDNSNamespace(namespaceName))
|
||||
if err != nil {
|
||||
return &ClusterError{
|
||||
Message: "failed to delete stealth TURNS DNS records",
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnableNamespaceRecord marks a specific IP's record as active (for recovery)
|
||||
func (drm *DNSRecordManager) EnableNamespaceRecord(ctx context.Context, namespaceName, ip string) error {
|
||||
internalCtx := client.WithInternalAuth(ctx)
|
||||
|
||||
@ -55,7 +55,7 @@ func TestGatewayWebRTCInSync_matchingBlock_returnsTrue(t *testing.T) {
|
||||
|
||||
func TestGatewayWebRTCInSync_eachFieldDriftDetected(t *testing.T) {
|
||||
// Any single drifted field must trigger a restart. Pins that the
|
||||
// comparison covers all four webrtc fields (a future refactor that
|
||||
// comparison covers all five webrtc fields (a future refactor that
|
||||
// drops one would silently let that field drift forever).
|
||||
base := gateway.GatewayYAMLWebRTC{
|
||||
Enabled: true, SFUPort: 30000,
|
||||
@ -69,6 +69,7 @@ func TestGatewayWebRTCInSync_eachFieldDriftDetected(t *testing.T) {
|
||||
{"sfu port changed", func(w *gateway.GatewayYAMLWebRTC) { w.SFUPort = 30001 }},
|
||||
{"turn domain changed", func(w *gateway.GatewayYAMLWebRTC) { w.TURNDomain = "turn.other" }},
|
||||
{"turn secret rotated", func(w *gateway.GatewayYAMLWebRTC) { w.TURNSecret = "rotated" }},
|
||||
{"stealth domain changed", func(w *gateway.GatewayYAMLWebRTC) { w.TURNStealthDomain = "cdn-deadbeef0000.orama-devnet.network" }},
|
||||
}
|
||||
for _, tc := range mutations {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
@ -190,3 +191,25 @@ func TestReconcileGateway_missingConfigReturnsErrorNotRestart(t *testing.T) {
|
||||
t.Error("missing config must return an error (don't blind-restart a healthy gateway)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGatewayWebRTCInSync_stealthEnableDetectedAsDrift(t *testing.T) {
|
||||
// feat-124: enabling stealth must drift an otherwise-matching gateway so
|
||||
// the reconciler rewrites its yaml with turn_stealth_domain and restarts
|
||||
// it — that's how turn.credentials starts advertising turns:<host>:443.
|
||||
onDisk := gateway.GatewayYAMLWebRTC{
|
||||
Enabled: true, SFUPort: 30000,
|
||||
TURNDomain: "turn.ns-anchat-test.orama-devnet.network", TURNSecret: "the-secret",
|
||||
}
|
||||
desired := desiredEnabled()
|
||||
desired.TURNStealthDomain = "cdn-abc123def456.orama-devnet.network"
|
||||
if gatewayWebRTCInSync(onDisk, desired) {
|
||||
t.Error("stealth enable not detected as drift — gateway would never advertise the stealth URI")
|
||||
}
|
||||
|
||||
// And once the yaml carries it, the same desired config is in-sync (no
|
||||
// restart loop).
|
||||
onDisk.TURNStealthDomain = desired.TURNStealthDomain
|
||||
if !gatewayWebRTCInSync(onDisk, desired) {
|
||||
t.Error("matching stealth domain reported as drift — restart loop")
|
||||
}
|
||||
}
|
||||
|
||||
@ -11,11 +11,11 @@ import "testing"
|
||||
// port is per-node (0 on a gateway-only node). Pins both the drift
|
||||
// fallback and the non-SFU-gateway case.
|
||||
|
||||
// dbFetch signature: () -> (turnSecret, turnDomain string, sfuPort int).
|
||||
func dbNone() (string, string, int) { return "", "", 0 }
|
||||
// dbFetch signature: () -> (turnSecret, turnDomain, stealthDomain string, sfuPort int).
|
||||
func dbNone() (string, string, string, int) { return "", "", "", 0 }
|
||||
|
||||
func dbFull(secret, domain string, sfuPort int) func() (string, string, int) {
|
||||
return func() (string, string, int) { return secret, domain, sfuPort }
|
||||
func dbFull(secret, domain string, sfuPort int) func() (string, string, string, int) {
|
||||
return func() (string, string, string, int) { return secret, domain, "", sfuPort }
|
||||
}
|
||||
|
||||
func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
|
||||
@ -23,8 +23,8 @@ func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
|
||||
// (the lazy dbFetch must not be called — saves a query on the hot
|
||||
// restart path).
|
||||
dbCalled := false
|
||||
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret",
|
||||
func() (string, string, int) { dbCalled = true; return dbNone() })
|
||||
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "",
|
||||
func() (string, string, string, int) { dbCalled = true; return dbNone() })
|
||||
|
||||
if dbCalled {
|
||||
t.Error("DB fetch was called even though the state file had the TURN secret (should short-circuit)")
|
||||
@ -41,7 +41,7 @@ func TestChooseRestoreWebRTC_staleStateFallsBackToDB(t *testing.T) {
|
||||
// The bug-25 drift case: state file has NO webrtc (stale — written
|
||||
// before enable), DB says enabled WITH an SFU port on this node. MUST
|
||||
// fall back to the DB and re-materialize the full block.
|
||||
got := chooseRestoreWebRTC(false, 0, "", "",
|
||||
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 7801))
|
||||
|
||||
if !got.enabled {
|
||||
@ -65,7 +65,7 @@ func TestChooseRestoreWebRTC_nonSFUGatewayGetsTURNOnly(t *testing.T) {
|
||||
// secret (so /v1/webrtc/turn/credentials registers + works) while
|
||||
// sfuPort stays 0 (signal/rooms don't register). This is exactly node
|
||||
// 57's situation — pre-fix it resolved to disabled and 404'd.
|
||||
got := chooseRestoreWebRTC(false, 0, "", "",
|
||||
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 0)) // sfuPort 0 = no local SFU
|
||||
|
||||
if !got.enabled {
|
||||
@ -84,8 +84,8 @@ func TestChooseRestoreWebRTC_stateHasTURNButNoSFU(t *testing.T) {
|
||||
// false / port 0. Must use the state TURN secret with sfuPort=0 and
|
||||
// NOT consult the DB (TURN secret present = complete enough).
|
||||
dbCalled := false
|
||||
got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret",
|
||||
func() (string, string, int) { dbCalled = true; return dbNone() })
|
||||
got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret", "",
|
||||
func() (string, string, string, int) { dbCalled = true; return dbNone() })
|
||||
|
||||
if dbCalled {
|
||||
t.Error("DB fetch called even though state file had the TURN secret")
|
||||
@ -98,7 +98,7 @@ func TestChooseRestoreWebRTC_stateHasTURNButNoSFU(t *testing.T) {
|
||||
func TestChooseRestoreWebRTC_bothEmptyDisabled(t *testing.T) {
|
||||
// Namespace genuinely without WebRTC: state empty, DB returns nothing.
|
||||
// Must return disabled so we don't register broken webrtc routes.
|
||||
got := chooseRestoreWebRTC(false, 0, "", "", dbNone)
|
||||
got := chooseRestoreWebRTC(false, 0, "", "", "", dbNone)
|
||||
if got.enabled {
|
||||
t.Errorf("want disabled when neither source has WebRTC; got %+v", got)
|
||||
}
|
||||
@ -109,8 +109,8 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {
|
||||
// provisioned / shouldn't happen). The TURN secret is the
|
||||
// enablement marker; without it we treat it as not-configured-for-
|
||||
// TURN, but an SFU port alone still enables SFU routes.
|
||||
got := chooseRestoreWebRTC(false, 0, "", "",
|
||||
func() (string, string, int) { return "", "turn.db", 9000 })
|
||||
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||
func() (string, string, string, int) { return "", "turn.db", "", 9000 })
|
||||
// dbFetch only runs when state secret is empty; here it returns no
|
||||
// secret, so the `if dbSecret != ""` guard means NOTHING is taken
|
||||
// from the DB → disabled. (An SFU-only-no-TURN namespace is not a
|
||||
@ -119,3 +119,39 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {
|
||||
t.Errorf("DB returned no TURN secret: want disabled; got %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
// --- feat-124 stealth domain restore precedence ---
|
||||
|
||||
func TestChooseRestoreWebRTC_stealthFromStateFile(t *testing.T) {
|
||||
// Stealth toggles rewrite cluster state, so a fresh state file carries
|
||||
// the stealth domain and must win without a DB call.
|
||||
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "cdn-abc123def456.dbrs.space",
|
||||
func() (string, string, string, int) {
|
||||
t.Error("DB fetch called even though state file was complete")
|
||||
return dbNone()
|
||||
})
|
||||
if got.stealthDomain != "cdn-abc123def456.dbrs.space" {
|
||||
t.Errorf("stealthDomain = %q; want state-file value", got.stealthDomain)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChooseRestoreWebRTC_stealthFromDBOnStaleState(t *testing.T) {
|
||||
// Stale state (no TURN secret) + DB has stealth enabled → stealth domain
|
||||
// re-materializes from the DB alongside the rest of the WebRTC block.
|
||||
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||
func() (string, string, string, int) {
|
||||
return "db-secret", "turn.ns-x.dbrs.space", "cdn-abc123def456.dbrs.space", 7801
|
||||
})
|
||||
if !got.enabled || got.stealthDomain != "cdn-abc123def456.dbrs.space" {
|
||||
t.Errorf("want stealth domain from DB on stale state; got %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestChooseRestoreWebRTC_noStealthStaysEmpty(t *testing.T) {
|
||||
// Stealth disabled everywhere → empty stealthDomain (gateway advertises
|
||||
// the baseline 3-rung ladder only).
|
||||
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "", dbNone)
|
||||
if got.stealthDomain != "" {
|
||||
t.Errorf("stealthDomain = %q; want empty when stealth is disabled", got.stealthDomain)
|
||||
}
|
||||
}
|
||||
|
||||
@ -238,6 +238,7 @@ func (s *SystemdSpawner) SpawnGateway(ctx context.Context, namespace, nodeID str
|
||||
SFUPort: cfg.SFUPort,
|
||||
TURNDomain: cfg.TURNDomain,
|
||||
TURNSecret: cfg.TURNSecret,
|
||||
TURNStealthDomain: cfg.TURNStealthDomain,
|
||||
},
|
||||
}
|
||||
|
||||
@ -343,7 +344,8 @@ func gatewayWebRTCInSync(onDisk gateway.GatewayYAMLWebRTC, cfg gateway.InstanceC
|
||||
return onDisk.Enabled == cfg.WebRTCEnabled &&
|
||||
onDisk.SFUPort == cfg.SFUPort &&
|
||||
onDisk.TURNSecret == cfg.TURNSecret &&
|
||||
onDisk.TURNDomain == cfg.TURNDomain
|
||||
onDisk.TURNDomain == cfg.TURNDomain &&
|
||||
onDisk.TURNStealthDomain == cfg.TURNStealthDomain
|
||||
}
|
||||
|
||||
// gatewayConfigInSync reports whether the full reconcile-relevant config on
|
||||
@ -516,6 +518,68 @@ type TURNInstanceConfig struct {
|
||||
RelayPortStart int // Start of relay port range
|
||||
RelayPortEnd int // End of relay port range
|
||||
TURNDomain string // TURN domain for Let's Encrypt cert (e.g., "turn.ns-myapp.orama-devnet.network")
|
||||
// StealthDomain is the neutral stealth TURNS host (feat-124). When set,
|
||||
// the TURN server carries a second Let's Encrypt cert for this name and
|
||||
// serves it to TLS clients whose SNI matches — the path the SNI router
|
||||
// forwards from :443. Stealth NEVER falls back to a self-signed cert: a
|
||||
// cert clients reject is indistinguishable from being blocked.
|
||||
StealthDomain string
|
||||
}
|
||||
|
||||
// acmeInternalEndpoint is the gateway's internal ACME endpoint that the
|
||||
// Caddyfile TURN-cert blocks point the orama DNS provider at.
|
||||
const acmeInternalEndpoint = "http://localhost:6001/v1/internal/acme"
|
||||
|
||||
// turnCertProvisionTimeout bounds how long a TURN spawn waits for Caddy to
|
||||
// provision a Let's Encrypt cert before falling back (primary domain) or
|
||||
// failing (stealth domain).
|
||||
const turnCertProvisionTimeout = 2 * time.Minute
|
||||
|
||||
// resolveTURNSCert resolves the TURNS cert/key pair for a domain.
|
||||
//
|
||||
// Let's Encrypt via Caddy is tried FIRST whenever a domain is set — the call
|
||||
// is idempotent and instant when the cert is already in Caddy's storage. This
|
||||
// ordering also self-heals nodes stuck on the self-signed fallback from an
|
||||
// earlier failed provisioning (live devnet finding, feat-124): the old code
|
||||
// never retried Caddy once a self-signed pair existed on disk, so strict TLS
|
||||
// clients kept failing turns: validation forever.
|
||||
//
|
||||
// allowSelfSigned controls the fallback: the primary TURN domain may fall
|
||||
// back to (or reuse) a self-signed pair at <configDir>/turn-{cert,key}.pem so
|
||||
// baseline TURN stays up, while the stealth domain must hard-fail instead.
|
||||
func (s *SystemdSpawner) resolveTURNSCert(namespace, domain, publicIP, configDir string, allowSelfSigned bool) (string, string, error) {
|
||||
if domain != "" {
|
||||
caddyCert, caddyKey, err := provisionTURNCertViaCaddy(domain, acmeInternalEndpoint, turnCertProvisionTimeout)
|
||||
if err == nil {
|
||||
s.logger.Info("Using Let's Encrypt cert from Caddy for TURNS",
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("domain", domain),
|
||||
zap.String("cert_path", caddyCert))
|
||||
return caddyCert, caddyKey, nil
|
||||
}
|
||||
if !allowSelfSigned {
|
||||
return "", "", fmt.Errorf("failed to provision Let's Encrypt cert for stealth TURNS domain %s (no self-signed fallback — clients must be able to validate it): %w", domain, err)
|
||||
}
|
||||
s.logger.Warn("Let's Encrypt cert provisioning failed, falling back to self-signed",
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("domain", domain),
|
||||
zap.Error(err))
|
||||
}
|
||||
if !allowSelfSigned {
|
||||
return "", "", fmt.Errorf("no domain configured for TURNS cert in namespace %s", namespace)
|
||||
}
|
||||
|
||||
certPath := filepath.Join(configDir, "turn-cert.pem")
|
||||
keyPath := filepath.Join(configDir, "turn-key.pem")
|
||||
if _, err := os.Stat(certPath); os.IsNotExist(err) {
|
||||
if err := turn.GenerateSelfSignedCert(certPath, keyPath, publicIP); err != nil {
|
||||
return "", "", fmt.Errorf("failed to generate TURNS self-signed cert for namespace %s: %w", namespace, err)
|
||||
}
|
||||
s.logger.Info("Generated TURNS self-signed certificate",
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("cert_path", certPath))
|
||||
}
|
||||
return certPath, keyPath, nil
|
||||
}
|
||||
|
||||
// SpawnTURN starts a TURN instance using systemd
|
||||
@ -534,42 +598,47 @@ func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string
|
||||
|
||||
configPath := filepath.Join(configDir, fmt.Sprintf("turn-%s.yaml", nodeID))
|
||||
|
||||
// Provision TLS cert for TURNS — try Let's Encrypt via Caddy first, fall back to self-signed
|
||||
certPath := filepath.Join(configDir, "turn-cert.pem")
|
||||
keyPath := filepath.Join(configDir, "turn-key.pem")
|
||||
// Provision TLS cert for TURNS — Let's Encrypt via Caddy first (idempotent,
|
||||
// also upgrades nodes stuck on the self-signed fallback), self-signed as
|
||||
// the primary-domain fallback only.
|
||||
var certPath, keyPath string
|
||||
if cfg.TURNSListenAddr != "" {
|
||||
if _, err := os.Stat(certPath); os.IsNotExist(err) {
|
||||
// Try Let's Encrypt via Caddy first
|
||||
if cfg.TURNDomain != "" {
|
||||
acmeEndpoint := "http://localhost:6001/v1/internal/acme"
|
||||
caddyCert, caddyKey, provErr := provisionTURNCertViaCaddy(cfg.TURNDomain, acmeEndpoint, 2*time.Minute)
|
||||
if provErr == nil {
|
||||
certPath = caddyCert
|
||||
keyPath = caddyKey
|
||||
s.logger.Info("Using Let's Encrypt cert from Caddy for TURNS",
|
||||
var certErr error
|
||||
certPath, keyPath, certErr = s.resolveTURNSCert(namespace, cfg.TURNDomain, cfg.PublicIP, configDir, true)
|
||||
if certErr != nil {
|
||||
s.logger.Warn("Failed to resolve TURNS cert, TURNS will be disabled",
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("domain", cfg.TURNDomain),
|
||||
zap.String("cert_path", certPath))
|
||||
} else {
|
||||
s.logger.Warn("Let's Encrypt cert provisioning failed, falling back to self-signed",
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("domain", cfg.TURNDomain),
|
||||
zap.Error(provErr))
|
||||
zap.Error(certErr))
|
||||
cfg.TURNSListenAddr = "" // Disable TURNS if no cert is available
|
||||
}
|
||||
}
|
||||
// Fallback: generate self-signed cert if no cert is available yet
|
||||
if _, statErr := os.Stat(certPath); os.IsNotExist(statErr) {
|
||||
if err := turn.GenerateSelfSignedCert(certPath, keyPath, cfg.PublicIP); err != nil {
|
||||
s.logger.Warn("Failed to generate TURNS self-signed cert, TURNS will be disabled",
|
||||
zap.String("namespace", namespace),
|
||||
zap.Error(err))
|
||||
cfg.TURNSListenAddr = "" // Disable TURNS if cert generation fails
|
||||
} else {
|
||||
s.logger.Info("Generated TURNS self-signed certificate",
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("cert_path", certPath))
|
||||
|
||||
// Stealth TURNS cert (feat-124): requires a working TURNS listener and a
|
||||
// CA-valid cert — hard error, never a silent downgrade, because the
|
||||
// operator explicitly enabled stealth and a half-working stealth endpoint
|
||||
// is invisible until a censored-region user fails to connect.
|
||||
var stealthCertPath, stealthKeyPath string
|
||||
if cfg.StealthDomain != "" {
|
||||
// Security: the stealth domain arrives over the spawn protocol (mesh
|
||||
// peers gated only by the static internal-auth header). Before it
|
||||
// reaches the Caddyfile/ACME sink, pin it to the deterministic
|
||||
// derivation so a forged value can't drive cert issuance for an
|
||||
// attacker-chosen name. cfg.Realm is the base domain on every TURN
|
||||
// spawn site. (provisionTURNCertViaCaddy adds a DNS-name allowlist as
|
||||
// defense-in-depth.)
|
||||
if cfg.Realm != "" {
|
||||
want := turn.StealthHostForNamespace(cfg.Namespace, cfg.Realm)
|
||||
if cfg.StealthDomain != want {
|
||||
return fmt.Errorf("stealth domain %q does not match the derived host %q for namespace %s — refusing to provision", cfg.StealthDomain, want, cfg.Namespace)
|
||||
}
|
||||
}
|
||||
if cfg.TURNSListenAddr == "" {
|
||||
return fmt.Errorf("stealth TURNS for namespace %s requires an active TURNS listener (no TLS cert/listener available)", namespace)
|
||||
}
|
||||
var stealthErr error
|
||||
stealthCertPath, stealthKeyPath, stealthErr = s.resolveTURNSCert(namespace, cfg.StealthDomain, cfg.PublicIP, configDir, false)
|
||||
if stealthErr != nil {
|
||||
return fmt.Errorf("failed to provision stealth TURNS cert for namespace %s: %w", namespace, stealthErr)
|
||||
}
|
||||
}
|
||||
|
||||
@ -588,6 +657,11 @@ func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string
|
||||
turnConfig.TLSCertPath = certPath
|
||||
turnConfig.TLSKeyPath = keyPath
|
||||
}
|
||||
if stealthCertPath != "" {
|
||||
turnConfig.StealthDomain = cfg.StealthDomain
|
||||
turnConfig.TLSStealthCertPath = stealthCertPath
|
||||
turnConfig.TLSStealthKeyPath = stealthKeyPath
|
||||
}
|
||||
|
||||
configBytes, err := yaml.Marshal(turnConfig)
|
||||
if err != nil {
|
||||
|
||||
@ -5,10 +5,20 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// dnsNamePattern matches a conservative lowercase DNS hostname. It exists to
|
||||
// keep an operator/spawn-supplied domain from breaking out of the Caddyfile
|
||||
// block it is interpolated into (a value containing '{', '}', or a newline
|
||||
// could otherwise inject arbitrary Caddy directives) and to refuse cert
|
||||
// provisioning for non-hostname junk. Security: defense-in-depth at the
|
||||
// Caddyfile sink; the caller also pins the stealth domain to its deterministic
|
||||
// derivation (systemd_spawner.go SpawnTURN).
|
||||
var dnsNamePattern = regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$`)
|
||||
|
||||
const (
|
||||
caddyfilePath = "/etc/caddy/Caddyfile"
|
||||
|
||||
@ -25,6 +35,12 @@ const (
|
||||
// If Caddy is not available or cert provisioning times out, returns an error
|
||||
// so the caller can fall back to a self-signed cert.
|
||||
func provisionTURNCertViaCaddy(domain, acmeEndpoint string, timeout time.Duration) (certPath, keyPath string, err error) {
|
||||
// Refuse anything that isn't a clean DNS name before it reaches the
|
||||
// Caddyfile write — blocks Caddyfile-injection via crafted domains.
|
||||
if !dnsNamePattern.MatchString(domain) {
|
||||
return "", "", fmt.Errorf("refusing to provision TURNS cert for non-DNS-name domain %q", domain)
|
||||
}
|
||||
|
||||
// Check if cert already exists from a previous provisioning
|
||||
certPath, keyPath = caddyCertPaths(domain)
|
||||
if _, err := os.Stat(certPath); err == nil {
|
||||
|
||||
108
core/pkg/namespace/turn_stealth_cert_test.go
Normal file
108
core/pkg/namespace/turn_stealth_cert_test.go
Normal file
@ -0,0 +1,108 @@
|
||||
package namespace
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// feat-124 — resolveTURNSCert semantics.
|
||||
//
|
||||
// On machines without a Caddyfile (tests, dev laptops) the Let's Encrypt
|
||||
// branch fails fast with "failed to read Caddyfile", exercising exactly the
|
||||
// fallback decision this function owns: primary domains degrade to a
|
||||
// self-signed pair, the stealth domain must hard-fail instead.
|
||||
|
||||
func testSpawner(t *testing.T) *SystemdSpawner {
|
||||
t.Helper()
|
||||
return &SystemdSpawner{logger: zap.NewNop()}
|
||||
}
|
||||
|
||||
func TestResolveTURNSCert_primaryFallsBackToSelfSigned(t *testing.T) {
|
||||
s := testSpawner(t)
|
||||
dir := t.TempDir()
|
||||
|
||||
certPath, keyPath, err := s.resolveTURNSCert("ns-test", "turn.ns-test.example.com", "203.0.113.7", dir, true)
|
||||
if err != nil {
|
||||
t.Fatalf("expected self-signed fallback, got error: %v", err)
|
||||
}
|
||||
if certPath != filepath.Join(dir, "turn-cert.pem") || keyPath != filepath.Join(dir, "turn-key.pem") {
|
||||
t.Errorf("unexpected fallback paths: %s / %s", certPath, keyPath)
|
||||
}
|
||||
if _, statErr := os.Stat(certPath); statErr != nil {
|
||||
t.Errorf("self-signed cert not written: %v", statErr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveTURNSCert_existingSelfSignedReused(t *testing.T) {
|
||||
s := testSpawner(t)
|
||||
dir := t.TempDir()
|
||||
|
||||
first, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", dir, true)
|
||||
if err != nil {
|
||||
t.Fatalf("first resolve: %v", err)
|
||||
}
|
||||
info1, err := os.Stat(first)
|
||||
if err != nil {
|
||||
t.Fatalf("stat first cert: %v", err)
|
||||
}
|
||||
|
||||
second, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", dir, true)
|
||||
if err != nil {
|
||||
t.Fatalf("second resolve: %v", err)
|
||||
}
|
||||
info2, err := os.Stat(second)
|
||||
if err != nil {
|
||||
t.Fatalf("stat second cert: %v", err)
|
||||
}
|
||||
if first != second || info1.ModTime() != info2.ModTime() {
|
||||
t.Error("existing self-signed pair was regenerated instead of reused")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveTURNSCert_stealthNeverFallsBackToSelfSigned(t *testing.T) {
|
||||
s := testSpawner(t)
|
||||
dir := t.TempDir()
|
||||
|
||||
_, _, err := s.resolveTURNSCert("ns-test", "cdn-abc123def456.example.com", "203.0.113.7", dir, false)
|
||||
if err == nil {
|
||||
t.Fatal("stealth cert resolution must hard-fail without Let's Encrypt — a self-signed stealth cert is indistinguishable from being blocked")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "cdn-abc123def456.example.com") {
|
||||
t.Errorf("error must name the stealth domain for the operator; got: %v", err)
|
||||
}
|
||||
if _, statErr := os.Stat(filepath.Join(dir, "turn-cert.pem")); !os.IsNotExist(statErr) {
|
||||
t.Error("stealth failure must not write a self-signed pair")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveTURNSCert_noDomainNoFallbackErrors(t *testing.T) {
|
||||
s := testSpawner(t)
|
||||
_, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", t.TempDir(), false)
|
||||
if err == nil {
|
||||
t.Fatal("empty domain with self-signed disallowed must error")
|
||||
}
|
||||
}
|
||||
|
||||
// Security (feat-124): the Caddyfile sink must refuse any domain that isn't a
|
||||
// clean DNS name, so a crafted value can't break out of the generated block
|
||||
// and inject Caddy directives.
|
||||
func TestProvisionTURNCertViaCaddy_rejectsNonDNSName(t *testing.T) {
|
||||
bad := []string{
|
||||
"example.com {\n reverse_proxy evil:1234\n}\n#",
|
||||
"has space.com",
|
||||
"UPPER.example.com",
|
||||
"nodots",
|
||||
"trailing-.example.com",
|
||||
"",
|
||||
}
|
||||
for _, d := range bad {
|
||||
if _, _, err := provisionTURNCertViaCaddy(d, "http://localhost:6001/v1/internal/acme", time.Second); err == nil {
|
||||
t.Errorf("provisionTURNCertViaCaddy(%q) accepted a non-DNS-name domain", d)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -249,6 +249,8 @@ var (
|
||||
ErrRecoveryInProgress = &ClusterError{Message: "recovery already in progress for this cluster"}
|
||||
ErrWebRTCAlreadyEnabled = &ClusterError{Message: "WebRTC is already enabled for this namespace"}
|
||||
ErrWebRTCNotEnabled = &ClusterError{Message: "WebRTC is not enabled for this namespace"}
|
||||
ErrWebRTCStealthAlreadyEnabled = &ClusterError{Message: "WebRTC stealth is already enabled for this namespace"}
|
||||
ErrWebRTCStealthNotEnabled = &ClusterError{Message: "WebRTC stealth is not enabled for this namespace"}
|
||||
ErrNoWebRTCPortsAvailable = &ClusterError{Message: "no WebRTC ports available on node"}
|
||||
)
|
||||
|
||||
@ -262,6 +264,10 @@ type WebRTCConfig struct {
|
||||
TURNCredentialTTL int `json:"turn_credential_ttl" db:"turn_credential_ttl"`
|
||||
SFUNodeCount int `json:"sfu_node_count" db:"sfu_node_count"`
|
||||
TURNNodeCount int `json:"turn_node_count" db:"turn_node_count"`
|
||||
// StealthEnabled gates the censorship-resistant TURNS:443 path (feat-124):
|
||||
// stealth cert on the TURN servers, SNI route on :443, and the
|
||||
// `turns:<stealth-host>:443` rung in the turn.credentials URI ladder.
|
||||
StealthEnabled bool `json:"stealth_enabled" db:"stealth_enabled"`
|
||||
EnabledBy string `json:"enabled_by" db:"enabled_by"`
|
||||
EnabledAt time.Time `json:"enabled_at" db:"enabled_at"`
|
||||
DisabledAt *time.Time `json:"disabled_at,omitempty" db:"disabled_at"`
|
||||
|
||||
@ -828,6 +828,7 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
|
||||
NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast").
|
||||
NewFunctionBuilder().WithFunc(e.hEphemeralStateSet).Export("ephemeral_state_set").
|
||||
NewFunctionBuilder().WithFunc(e.hEphemeralStateClear).Export("ephemeral_state_clear").
|
||||
NewFunctionBuilder().WithFunc(e.hEphemeralStateList).Export("ephemeral_state_list").
|
||||
NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke").
|
||||
NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async").
|
||||
NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info").
|
||||
@ -1463,6 +1464,33 @@ func (e *Engine) hEphemeralStateClear(ctx context.Context, mod api.Module,
|
||||
return 1
|
||||
}
|
||||
|
||||
// hEphemeralStateList is the WASM-callable wrapper for EphemeralStateList —
|
||||
// the bugboard #710 reconnect catch-up read.
|
||||
//
|
||||
// ABI: ephemeral_state_list(topicPtr, topicLen uint32) -> uint64 packed
|
||||
// (ptr<<32 | len) pointing to a JSON envelope in guest memory:
|
||||
//
|
||||
// {"entries":[{"key":..,"client_id":..,"payload":<base64>,"expires_in_ms":..}, …]}
|
||||
//
|
||||
// Returns 0 on failure (empty topic, no invocation context, ephemeral state
|
||||
// unavailable, or a guest-memory error). Unlike set/clear, no WS client is
|
||||
// required — the read is namespace-scoped via the invocation context.
|
||||
func (e *Engine) hEphemeralStateList(ctx context.Context, mod api.Module,
|
||||
topicPtr, topicLen uint32) uint64 {
|
||||
topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
out, err := e.hostServices.EphemeralStateList(ctx, string(topic))
|
||||
if err != nil {
|
||||
e.logger.Warn("host function ephemeral_state_list failed",
|
||||
zap.String("topic", string(topic)),
|
||||
zap.Error(err))
|
||||
return 0
|
||||
}
|
||||
return e.executor.WriteToGuest(ctx, mod, out)
|
||||
}
|
||||
|
||||
// hPushSend is the WASM-callable wrapper for PushSend.
|
||||
// Inputs:
|
||||
//
|
||||
|
||||
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
@ -47,26 +48,29 @@ const (
|
||||
ephemeralSweepInterval = 10 * time.Second
|
||||
)
|
||||
|
||||
// EphemeralEventKind discriminates the synthetic events published on a topic.
|
||||
type EphemeralEventKind string
|
||||
|
||||
// Synthetic-event discriminator values carried in the `_orama` field. The
|
||||
// `_orama` control-frame namespace is the contract agreed with app teams on
|
||||
// bugboard #710 (#458/#505/#849/#901) — the same dispatch pattern clients
|
||||
// already use for the auth.refresh control frame from #321.
|
||||
const (
|
||||
EphemeralEventSet EphemeralEventKind = "set"
|
||||
EphemeralEventClear EphemeralEventKind = "clear"
|
||||
EphemeralEventSet = "ephemeral.set"
|
||||
EphemeralEventClear = "ephemeral.clear"
|
||||
)
|
||||
|
||||
// EphemeralEvent is the wire shape published on the topic when ephemeral state
|
||||
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers key off
|
||||
// Kind + Key to update their local view. Payload is only populated for "set".
|
||||
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers dispatch
|
||||
// on the `_orama` discriminator + Key to update their local view. Payload is
|
||||
// only populated for "ephemeral.set".
|
||||
type EphemeralEvent struct {
|
||||
Type string `json:"__ephemeral"` // always "state"
|
||||
Kind EphemeralEventKind `json:"kind"` // set | clear
|
||||
Type string `json:"_orama"` // "ephemeral.set" | "ephemeral.clear"
|
||||
Topic string `json:"topic"` // the topic the state lives on (self-describing for sub-routers)
|
||||
Key string `json:"key"` // app-chosen key
|
||||
ClientID string `json:"client_id"` // owning WS client
|
||||
// Payload is the opaque app-chosen blob (may be JSON, protobuf, or
|
||||
// arbitrary bytes), present only for "set". encoding/json base64-encodes
|
||||
// a []byte on the wire, so subscribers base64-decode "payload" to recover
|
||||
// the original bytes — mirroring how pubsub_publish_batch carries data.
|
||||
// arbitrary bytes), present only for "ephemeral.set". encoding/json
|
||||
// base64-encodes a []byte on the wire, so subscribers base64-decode
|
||||
// "payload" to recover the original bytes — mirroring how
|
||||
// pubsub_publish_batch carries data.
|
||||
Payload []byte `json:"payload,omitempty"`
|
||||
Reason string `json:"reason,omitempty"` // clear only: explicit|disconnect|expired
|
||||
}
|
||||
@ -192,8 +196,8 @@ func (s *EphemeralStore) Set(ctx context.Context, namespace, clientID, topic, ke
|
||||
s.mu.Unlock()
|
||||
|
||||
evt := EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventSet,
|
||||
Type: EphemeralEventSet,
|
||||
Topic: topic,
|
||||
Key: key,
|
||||
ClientID: clientID,
|
||||
Payload: payloadCopy,
|
||||
@ -225,14 +229,60 @@ func (s *EphemeralStore) Clear(ctx context.Context, namespace, clientID, topic,
|
||||
s.mu.Unlock()
|
||||
|
||||
return s.publishEvent(ctx, namespace, topic, EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventClear,
|
||||
Type: EphemeralEventClear,
|
||||
Topic: topic,
|
||||
Key: key,
|
||||
ClientID: clientID,
|
||||
Reason: "explicit",
|
||||
})
|
||||
}
|
||||
|
||||
// EphemeralListEntry is one live entry returned by List — the reconnect
|
||||
// catch-up shape for the ephemeral_state_list host fn. ExpiresInMs is relative
|
||||
// (remaining TTL) so callers don't need a synchronized clock.
|
||||
type EphemeralListEntry struct {
|
||||
Key string `json:"key"`
|
||||
ClientID string `json:"client_id"`
|
||||
Payload []byte `json:"payload,omitempty"`
|
||||
ExpiresInMs int64 `json:"expires_in_ms"`
|
||||
}
|
||||
|
||||
// List returns the live (non-expired) entries on a (namespace, topic), sorted
|
||||
// by key for deterministic output. The reconnect catch-up path (bugboard #710
|
||||
// acceptance): a client that just (re)subscribed reads the current state once,
|
||||
// then tracks the ephemeral.set/ephemeral.clear event stream. Read-only — no
|
||||
// ownership requirement, no WS client needed.
|
||||
func (s *EphemeralStore) List(namespace, topic string) []EphemeralListEntry {
|
||||
now := s.now()
|
||||
|
||||
s.mu.Lock()
|
||||
entries := make([]EphemeralListEntry, 0)
|
||||
for sk, entry := range s.values {
|
||||
if sk.namespace != namespace || sk.topic != topic {
|
||||
continue
|
||||
}
|
||||
if !now.Before(entry.expiresAt) {
|
||||
// now >= expiresAt: hide it. Intentionally one tick stricter than
|
||||
// sweepExpired (which removes only when now.After(expiresAt)) — a
|
||||
// reconnect catch-up must never surface state that is at/past its
|
||||
// deadline, even if the backstop sweeper hasn't run yet.
|
||||
continue
|
||||
}
|
||||
payloadCopy := make([]byte, len(entry.payload))
|
||||
copy(payloadCopy, entry.payload)
|
||||
entries = append(entries, EphemeralListEntry{
|
||||
Key: entry.key,
|
||||
ClientID: entry.clientID,
|
||||
Payload: payloadCopy,
|
||||
ExpiresInMs: entry.expiresAt.Sub(now).Milliseconds(),
|
||||
})
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
sort.Slice(entries, func(i, j int) bool { return entries[i].Key < entries[j].Key })
|
||||
return entries
|
||||
}
|
||||
|
||||
// ClearClient removes every entry owned by clientID and publishes a clear
|
||||
// event for each (reason "disconnect"). Called from the WS disconnect hook —
|
||||
// the primary, zero-lag cleanup path. Safe to call for an unknown client.
|
||||
@ -261,8 +311,8 @@ func (s *EphemeralStore) clearClientWithReason(ctx context.Context, clientID, re
|
||||
|
||||
for _, entry := range toClear {
|
||||
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventClear,
|
||||
Type: EphemeralEventClear,
|
||||
Topic: entry.topic,
|
||||
Key: entry.key,
|
||||
ClientID: clientID,
|
||||
Reason: reason,
|
||||
@ -292,7 +342,7 @@ func (s *EphemeralStore) publishEvent(ctx context.Context, namespace, topic stri
|
||||
return fmt.Errorf("ephemeral state: marshal event: %w", err)
|
||||
}
|
||||
if err := s.publish(ctx, namespace, topic, data); err != nil {
|
||||
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Kind, err)
|
||||
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Type, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -335,8 +385,8 @@ func (s *EphemeralStore) sweepExpired(ctx context.Context) {
|
||||
|
||||
for _, entry := range expired {
|
||||
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventClear,
|
||||
Type: EphemeralEventClear,
|
||||
Topic: entry.topic,
|
||||
Key: entry.key,
|
||||
ClientID: entry.clientID,
|
||||
Reason: "expired",
|
||||
|
||||
@ -40,12 +40,12 @@ func (c *capturePublisher) snapshot() []capturedEvent {
|
||||
return out
|
||||
}
|
||||
|
||||
func (c *capturePublisher) countKind(kind EphemeralEventKind) int {
|
||||
func (c *capturePublisher) countKind(eventType string) int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
n := 0
|
||||
for _, e := range c.events {
|
||||
if e.event.Kind == kind {
|
||||
if e.event.Type == eventType {
|
||||
n++
|
||||
}
|
||||
}
|
||||
@ -114,7 +114,7 @@ func TestEphemeralStore_SetThenDisconnect(t *testing.T) {
|
||||
t.Errorf("disconnect clear events = %d, want 2", got)
|
||||
}
|
||||
for _, e := range pub.snapshot() {
|
||||
if e.event.Kind == EphemeralEventClear && e.event.Reason != "disconnect" {
|
||||
if e.event.Type == EphemeralEventClear && e.event.Reason != "disconnect" {
|
||||
t.Errorf("clear reason = %q, want disconnect", e.event.Reason)
|
||||
}
|
||||
}
|
||||
@ -149,7 +149,7 @@ func TestEphemeralStore_TTLExpiry(t *testing.T) {
|
||||
// A clear event with reason=expired must have been published.
|
||||
foundExpired := false
|
||||
for _, e := range pub.snapshot() {
|
||||
if e.event.Kind == EphemeralEventClear && e.event.Reason == "expired" {
|
||||
if e.event.Type == EphemeralEventClear && e.event.Reason == "expired" {
|
||||
foundExpired = true
|
||||
}
|
||||
}
|
||||
@ -293,3 +293,130 @@ func TestEphemeralStore_OwnershipTransfer(t *testing.T) {
|
||||
t.Errorf("new owner's disconnect did not clear, count=%d", s.keyCountForTest())
|
||||
}
|
||||
}
|
||||
|
||||
// TestEphemeralStore_wireFormatContract pins the EXACT JSON wire shape of the
|
||||
// synthetic events — the `_orama` control-frame contract agreed with app teams
|
||||
// on bugboard #710 (#458/#505/#849/#901). Client sub-routers dispatch on the
|
||||
// `_orama` discriminator; renaming any of these fields is a breaking protocol
|
||||
// change and must fail this test.
|
||||
func TestEphemeralStore_wireFormatContract(t *testing.T) {
|
||||
type raw struct {
|
||||
Orama string `json:"_orama"`
|
||||
Topic string `json:"topic"`
|
||||
Key string `json:"key"`
|
||||
ClientID string `json:"client_id"`
|
||||
Payload []byte `json:"payload"`
|
||||
Reason string `json:"reason"`
|
||||
}
|
||||
var got []raw
|
||||
pub := func(_ context.Context, _, _ string, data []byte) error {
|
||||
var r raw
|
||||
if err := json.Unmarshal(data, &r); err != nil {
|
||||
return err
|
||||
}
|
||||
got = append(got, r)
|
||||
return nil
|
||||
}
|
||||
s := newTestStore(pub)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := s.Set(ctx, "ns1", "client-A", "typing:room1", "user-7", []byte("blob"), 0); err != nil {
|
||||
t.Fatalf("Set: %v", err)
|
||||
}
|
||||
s.ClearClient(ctx, "client-A")
|
||||
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("expected 2 events (set + disconnect clear), got %d", len(got))
|
||||
}
|
||||
set, clear := got[0], got[1]
|
||||
if set.Orama != "ephemeral.set" {
|
||||
t.Errorf(`set _orama = %q, want "ephemeral.set"`, set.Orama)
|
||||
}
|
||||
if set.Topic != "typing:room1" || set.Key != "user-7" || set.ClientID != "client-A" {
|
||||
t.Errorf("set event fields wrong: %+v", set)
|
||||
}
|
||||
if string(set.Payload) != "blob" {
|
||||
t.Errorf("set payload = %q, want blob", set.Payload)
|
||||
}
|
||||
if clear.Orama != "ephemeral.clear" {
|
||||
t.Errorf(`clear _orama = %q, want "ephemeral.clear"`, clear.Orama)
|
||||
}
|
||||
if clear.Topic != "typing:room1" || clear.Key != "user-7" || clear.Reason != "disconnect" {
|
||||
t.Errorf("clear event fields wrong: %+v", clear)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStoreList_returnsLiveEntriesSorted(t *testing.T) {
|
||||
s := newTestStore(nil)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := s.Set(ctx, "ns1", "client-B", "presence:room1", "zeta", []byte("z"), 0); err != nil {
|
||||
t.Fatalf("Set zeta: %v", err)
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "client-A", "presence:room1", "alpha", []byte("a"), 0); err != nil {
|
||||
t.Fatalf("Set alpha: %v", err)
|
||||
}
|
||||
|
||||
entries := s.List("ns1", "presence:room1")
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("List returned %d entries, want 2", len(entries))
|
||||
}
|
||||
if entries[0].Key != "alpha" || entries[1].Key != "zeta" {
|
||||
t.Errorf("entries not sorted by key: %q, %q", entries[0].Key, entries[1].Key)
|
||||
}
|
||||
if entries[0].ClientID != "client-A" || string(entries[0].Payload) != "a" {
|
||||
t.Errorf("entry fields wrong: %+v", entries[0])
|
||||
}
|
||||
if entries[0].ExpiresInMs <= 0 {
|
||||
t.Errorf("ExpiresInMs must be positive for a live entry, got %d", entries[0].ExpiresInMs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStoreList_excludesExpiredAndOtherScopes(t *testing.T) {
|
||||
s := newTestStore(nil)
|
||||
ctx := context.Background()
|
||||
base := time.Now()
|
||||
s.now = func() time.Time { return base }
|
||||
|
||||
if err := s.Set(ctx, "ns1", "c", "t", "live", []byte("p"), 60_000); err != nil {
|
||||
t.Fatalf("Set live: %v", err)
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "c", "t", "dying", []byte("p"), 1000); err != nil {
|
||||
t.Fatalf("Set dying: %v", err)
|
||||
}
|
||||
if err := s.Set(ctx, "ns2", "c", "t", "other-ns", []byte("p"), 60_000); err != nil {
|
||||
t.Fatalf("Set other-ns: %v", err)
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "c", "t2", "other-topic", []byte("p"), 60_000); err != nil {
|
||||
t.Fatalf("Set other-topic: %v", err)
|
||||
}
|
||||
|
||||
// Advance past "dying"'s TTL but do NOT sweep — List must hide it anyway.
|
||||
s.now = func() time.Time { return base.Add(2 * time.Second) }
|
||||
|
||||
entries := s.List("ns1", "t")
|
||||
if len(entries) != 1 || entries[0].Key != "live" {
|
||||
t.Fatalf("List = %+v, want exactly the single live ns1/t entry", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStoreList_emptyTopicReturnsEmpty(t *testing.T) {
|
||||
s := newTestStore(nil)
|
||||
if entries := s.List("ns1", "nothing-here"); len(entries) != 0 {
|
||||
t.Errorf("List on empty topic = %+v, want empty", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStoreList_snapshotIsDefensiveCopy(t *testing.T) {
|
||||
s := newTestStore(nil)
|
||||
ctx := context.Background()
|
||||
if err := s.Set(ctx, "ns1", "c", "t", "k", []byte("orig"), 0); err != nil {
|
||||
t.Fatalf("Set: %v", err)
|
||||
}
|
||||
entries := s.List("ns1", "t")
|
||||
entries[0].Payload[0] = 'X'
|
||||
fresh := s.List("ns1", "t")
|
||||
if string(fresh[0].Payload) != "orig" {
|
||||
t.Error("List payload is not a defensive copy; store was mutated")
|
||||
}
|
||||
}
|
||||
|
||||
@ -146,6 +146,10 @@ func (m *mockHostServices) EphemeralStateClear(ctx context.Context, topic, key s
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHostServices) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
|
||||
return []byte(`{"entries":[]}`), nil
|
||||
}
|
||||
|
||||
func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -220,6 +220,34 @@ func (h *HostFunctions) EphemeralStateClear(ctx context.Context, topic, key stri
|
||||
return nil
|
||||
}
|
||||
|
||||
// ephemeralListEnvelope is the JSON shape returned by EphemeralStateList —
|
||||
// an object (not a bare array) so fields can be added without breaking
|
||||
// existing WASM callers.
|
||||
type ephemeralListEnvelope struct {
|
||||
Entries []serverless.EphemeralListEntry `json:"entries"`
|
||||
}
|
||||
|
||||
// EphemeralStateList returns the live ephemeral entries on a topic in the
|
||||
// invocation's namespace (bugboard #710 reconnect catch-up). Read-only: no
|
||||
// WS client required, so HTTP-invoked functions can serve snapshots too.
|
||||
func (h *HostFunctions) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
|
||||
if h.ephemeralStore == nil {
|
||||
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
|
||||
}
|
||||
if topic == "" {
|
||||
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("topic is required")}
|
||||
}
|
||||
cur := h.currentInvocationContext(ctx)
|
||||
if cur == nil {
|
||||
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("no invocation context")}
|
||||
}
|
||||
out, err := json.Marshal(ephemeralListEnvelope{Entries: h.ephemeralStore.List(cur.Namespace, topic)})
|
||||
if err != nil {
|
||||
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("marshal entries: %w", err)}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// WSSend sends data to a specific WebSocket client.
|
||||
func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||
if h.wsManager == nil {
|
||||
|
||||
@ -259,6 +259,10 @@ func (m *MockHostServices) EphemeralStateClear(ctx context.Context, topic, key s
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockHostServices) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
|
||||
return []byte(`{"entries":[]}`), nil
|
||||
}
|
||||
|
||||
func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -595,6 +595,14 @@ type HostServices interface {
|
||||
// non-owned key is a no-op. Errors only on no-WS-client / empty topic-key.
|
||||
EphemeralStateClear(ctx context.Context, topic, key string) error
|
||||
|
||||
// EphemeralStateList returns the live entries on a topic in the current
|
||||
// invocation's namespace as a JSON envelope:
|
||||
// {"entries":[{"key":..,"client_id":..,"payload":<base64>,"expires_in_ms":..}, …]}
|
||||
// The reconnect catch-up read (bugboard #710 acceptance): unlike
|
||||
// Set/Clear it does NOT require a WS client in context — any function
|
||||
// invocation may read. Errors on empty topic or no invocation context.
|
||||
EphemeralStateList(ctx context.Context, topic string) ([]byte, error)
|
||||
|
||||
// WebSocket operations (only valid in WS context)
|
||||
WSSend(ctx context.Context, clientID string, data []byte) error
|
||||
WSBroadcast(ctx context.Context, topic string, data []byte) error
|
||||
|
||||
129
core/pkg/sniproxy/discoverer.go
Normal file
129
core/pkg/sniproxy/discoverer.go
Normal file
@ -0,0 +1,129 @@
|
||||
package sniproxy
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// discoveryWarnInterval rate-limits the "discovery scan failed" warning so a
|
||||
// persistently-unreadable namespaces directory cannot flood the journal.
|
||||
const discoveryWarnInterval = 5 * time.Minute
|
||||
|
||||
// StaticRoutes returns the operator-set routes parsed from the SNI router's own
|
||||
// config file plus the fallback backend. The discoverer merges these with the
|
||||
// auto-discovered TURN routes; static routes win on an SNI conflict.
|
||||
type StaticRoutes func() (routes []Route, fallback Backend, err error)
|
||||
|
||||
// TURNRouteDiscoverer periodically rescans the namespaces directory for
|
||||
// per-namespace TURNS listeners, merges the discovered routes with the static
|
||||
// routes from the config file (static wins on conflict), and atomically
|
||||
// installs the result on the Router.
|
||||
//
|
||||
// A transient failure (unreadable namespaces dir, or a bad static-config read)
|
||||
// logs a rate-limited warning and KEEPS the previously-installed routes — a
|
||||
// filesystem hiccup must never blackhole live :443 traffic.
|
||||
type TURNRouteDiscoverer struct {
|
||||
cfg TURNDiscoveryConfig
|
||||
static StaticRoutes
|
||||
router *Router
|
||||
logger *zap.Logger
|
||||
|
||||
// lastWarn is only touched by the Run goroutine after the synchronous
|
||||
// startup Apply, so it needs no lock.
|
||||
lastWarn time.Time
|
||||
}
|
||||
|
||||
// NewTURNRouteDiscoverer constructs a discoverer. static reads the operator's
|
||||
// config-file routes + fallback; router receives the merged Replace calls.
|
||||
func NewTURNRouteDiscoverer(cfg TURNDiscoveryConfig, static StaticRoutes, router *Router, logger *zap.Logger) *TURNRouteDiscoverer {
|
||||
if logger == nil {
|
||||
logger = zap.NewNop()
|
||||
}
|
||||
return &TURNRouteDiscoverer{cfg: cfg, static: static, router: router, logger: logger}
|
||||
}
|
||||
|
||||
// Apply performs one scan+merge and installs the result atomically. On any
|
||||
// transient error it returns the error and leaves the Router untouched so the
|
||||
// caller can decide whether to fail startup (Apply) or keep stale routes (Run).
|
||||
func (d *TURNRouteDiscoverer) Apply() error {
|
||||
staticRoutes, fallback, err := d.static()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
discovered, err := DiscoverTURNRoutes(d.cfg, d.logger)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
merged := mergeRoutes(staticRoutes, discovered)
|
||||
d.router.Replace(merged, fallback)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Run scans immediately, then every rescan interval until stop is closed. A
|
||||
// failed scan keeps the current routes and logs a rate-limited warning.
|
||||
func (d *TURNRouteDiscoverer) Run(stop <-chan struct{}) {
|
||||
if err := d.Apply(); err != nil {
|
||||
d.warn("initial TURN route discovery failed; serving config-file routes only", err)
|
||||
}
|
||||
|
||||
interval := d.cfg.RescanInterval
|
||||
if interval <= 0 {
|
||||
interval = DefaultDiscoveryRescanInterval
|
||||
}
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-stop:
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := d.Apply(); err != nil {
|
||||
d.warn("TURN route discovery failed; keeping current routes", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// warn logs at most once per discoveryWarnInterval to avoid journal flooding
|
||||
// when the namespaces directory is persistently unreadable.
|
||||
func (d *TURNRouteDiscoverer) warn(msg string, err error) {
|
||||
now := time.Now()
|
||||
if now.Sub(d.lastWarn) < discoveryWarnInterval {
|
||||
return
|
||||
}
|
||||
d.lastWarn = now
|
||||
d.logger.Warn(msg,
|
||||
zap.String("namespaces_dir", d.cfg.NamespacesDir),
|
||||
zap.Error(err))
|
||||
}
|
||||
|
||||
// mergeRoutes combines static and discovered routes with static taking
|
||||
// precedence on an SNI-match conflict. Static routes keep their original order
|
||||
// and precede discovered ones, matching Router.Pick's first-match semantics.
|
||||
func mergeRoutes(static, discovered []Route) []Route {
|
||||
seen := make(map[string]struct{}, len(static))
|
||||
merged := make([]Route, 0, len(static)+len(discovered))
|
||||
for _, r := range static {
|
||||
seen[matchKey(r.Match)] = struct{}{}
|
||||
merged = append(merged, r)
|
||||
}
|
||||
for _, r := range discovered {
|
||||
if _, conflict := seen[matchKey(r.Match)]; conflict {
|
||||
continue // static wins
|
||||
}
|
||||
merged = append(merged, r)
|
||||
}
|
||||
return merged
|
||||
}
|
||||
|
||||
// matchKey normalizes an SNI match for conflict comparison (matching is
|
||||
// case-insensitive, mirroring Router.Pick / matchSNI).
|
||||
func matchKey(match string) string {
|
||||
return strings.ToLower(match)
|
||||
}
|
||||
143
core/pkg/sniproxy/discoverer_test.go
Normal file
143
core/pkg/sniproxy/discoverer_test.go
Normal file
@ -0,0 +1,143 @@
|
||||
package sniproxy
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||
)
|
||||
|
||||
// TestTURNRouteDiscoverer_staticRouteWinsMerge verifies that when a discovered
|
||||
// stealth route collides with a static config route on the same SNI, the static
|
||||
// route's backend is the one that ends up in the router (static wins).
|
||||
func TestTURNRouteDiscoverer_staticRouteWinsMerge(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
const base = "example.com"
|
||||
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
|
||||
|
||||
stealthHost := turn.StealthHostForNamespace("anchat", base)
|
||||
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
|
||||
|
||||
// Static config pins the very same stealth host to a DIFFERENT backend.
|
||||
static := func() ([]Route, Backend, error) {
|
||||
return []Route{
|
||||
{Match: stealthHost, Backend: Backend{Name: "static-override", Network: "tcp", Addr: "127.0.0.1:9999"}},
|
||||
}, fallback, nil
|
||||
}
|
||||
|
||||
router := NewRouter(Backend{})
|
||||
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: dir, BaseDomain: base}, static, router, nil)
|
||||
if err := d.Apply(); err != nil {
|
||||
t.Fatalf("Apply failed: %v", err)
|
||||
}
|
||||
|
||||
// Pick must return the static backend, not the discovered one.
|
||||
got := router.Pick(stealthHost)
|
||||
if got.Addr != "127.0.0.1:9999" {
|
||||
t.Errorf("static route should win: got backend %q, want 127.0.0.1:9999", got.Addr)
|
||||
}
|
||||
|
||||
// The non-conflicting discovered alias must still be present.
|
||||
alias := router.Pick("turn.ns-anchat." + base)
|
||||
if alias.Addr != "127.0.0.1:5349" {
|
||||
t.Errorf("discovered alias route missing/wrong: got %q", alias.Addr)
|
||||
}
|
||||
|
||||
// Fallback preserved from static source.
|
||||
if router.Fallback().Addr != "127.0.0.1:8443" {
|
||||
t.Errorf("fallback not preserved: got %q", router.Fallback().Addr)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTURNRouteDiscoverer_transientErrorKeepsPreviousRoutes verifies that once
|
||||
// routes are installed, a subsequent Apply whose scan fails (namespaces dir
|
||||
// removed) returns an error and leaves the previously-installed routes intact —
|
||||
// a transient filesystem error must never blackhole :443.
|
||||
func TestTURNRouteDiscoverer_transientErrorKeepsPreviousRoutes(t *testing.T) {
|
||||
parent := t.TempDir()
|
||||
nsDir := filepath.Join(parent, "namespaces")
|
||||
const base = "example.com"
|
||||
writeTURNConfig(t, nsDir, "anchat", "node-1", "0.0.0.0:5349")
|
||||
|
||||
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
|
||||
static := func() ([]Route, Backend, error) { return nil, fallback, nil }
|
||||
|
||||
router := NewRouter(Backend{})
|
||||
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: nsDir, BaseDomain: base}, static, router, nil)
|
||||
|
||||
// First Apply succeeds and installs the anchat routes.
|
||||
if err := d.Apply(); err != nil {
|
||||
t.Fatalf("first Apply failed: %v", err)
|
||||
}
|
||||
before := len(router.Routes())
|
||||
if before != 2 {
|
||||
t.Fatalf("expected 2 routes after first apply, got %d", before)
|
||||
}
|
||||
|
||||
// Make the namespaces dir unreadable by pointing the discoverer at a now-
|
||||
// removed path (simulate transient read failure).
|
||||
d.cfg.NamespacesDir = filepath.Join(parent, "gone")
|
||||
|
||||
err := d.Apply()
|
||||
if err == nil {
|
||||
t.Fatalf("expected Apply to error on missing namespaces dir")
|
||||
}
|
||||
|
||||
// Routes must be unchanged — the failed scan kept the previous table.
|
||||
after := router.Routes()
|
||||
if len(after) != before {
|
||||
t.Errorf("routes changed on transient error: had %d, now %d", before, len(after))
|
||||
}
|
||||
stealthHost := turn.StealthHostForNamespace("anchat", base)
|
||||
if router.Pick(stealthHost).Addr != "127.0.0.1:5349" {
|
||||
t.Errorf("previously-installed stealth route lost after transient error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestTURNRouteDiscoverer_staticSourceErrorKeepsRoutes verifies a failing static
|
||||
// source (e.g. a bad config-file edit) also leaves the router untouched.
|
||||
func TestTURNRouteDiscoverer_staticSourceErrorKeepsRoutes(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
const base = "example.com"
|
||||
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
|
||||
|
||||
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
|
||||
good := func() ([]Route, Backend, error) { return nil, fallback, nil }
|
||||
|
||||
router := NewRouter(Backend{})
|
||||
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: dir, BaseDomain: base}, good, router, nil)
|
||||
if err := d.Apply(); err != nil {
|
||||
t.Fatalf("first Apply failed: %v", err)
|
||||
}
|
||||
before := len(router.Routes())
|
||||
|
||||
// Swap in a static source that errors (simulates a malformed config file).
|
||||
d.static = func() ([]Route, Backend, error) { return nil, Backend{}, errors.New("bad config") }
|
||||
if err := d.Apply(); err == nil {
|
||||
t.Fatalf("expected Apply to error on static source failure")
|
||||
}
|
||||
if len(router.Routes()) != before {
|
||||
t.Errorf("routes changed on static-source error: had %d, now %d", before, len(router.Routes()))
|
||||
}
|
||||
}
|
||||
|
||||
// TestMergeRoutes_staticPrecedesDiscovered checks first-match ordering: static
|
||||
// routes precede discovered ones in the merged slice.
|
||||
func TestMergeRoutes_staticPrecedesDiscovered(t *testing.T) {
|
||||
static := []Route{{Match: "a.example.com", Backend: Backend{Addr: "127.0.0.1:1"}}}
|
||||
discovered := []Route{
|
||||
{Match: "a.example.com", Backend: Backend{Addr: "127.0.0.1:2"}}, // conflict, dropped
|
||||
{Match: "b.example.com", Backend: Backend{Addr: "127.0.0.1:3"}},
|
||||
}
|
||||
merged := mergeRoutes(static, discovered)
|
||||
if len(merged) != 2 {
|
||||
t.Fatalf("expected 2 merged routes (1 static + 1 non-conflicting), got %d: %+v", len(merged), merged)
|
||||
}
|
||||
if merged[0].Match != "a.example.com" || merged[0].Backend.Addr != "127.0.0.1:1" {
|
||||
t.Errorf("static route should be first and unchanged: %+v", merged[0])
|
||||
}
|
||||
if merged[1].Match != "b.example.com" {
|
||||
t.Errorf("non-conflicting discovered route missing: %+v", merged)
|
||||
}
|
||||
}
|
||||
185
core/pkg/sniproxy/discovery.go
Normal file
185
core/pkg/sniproxy/discovery.go
Normal file
@ -0,0 +1,185 @@
|
||||
package sniproxy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||
"go.uber.org/zap"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// DefaultDiscoveryRescanInterval is the default cadence at which the TURN route
|
||||
// discoverer rescans the namespaces directory. SNI route changes (a namespace
|
||||
// gaining or losing its TURNS listener) are infrequent, so 30s of detection
|
||||
// latency is acceptable and keeps load on the filesystem negligible.
|
||||
const DefaultDiscoveryRescanInterval = 30 * time.Second
|
||||
|
||||
// turnConfigGlob matches the per-node TURN config files the namespace spawner
|
||||
// writes under "<namespaces_dir>/<namespace>/configs/turn-<nodeID>.yaml".
|
||||
const turnConfigGlob = "configs/turn-*.yaml"
|
||||
|
||||
// stealthBackendNamePrefix labels discovered TURN backends in logs/metrics.
|
||||
const stealthBackendNamePrefix = "turn-stealth-"
|
||||
|
||||
// turnBackendStealthHostLabel and turnBackendNamespaceLabel are the two SNI
|
||||
// hostname shapes the router forwards to a namespace's TURNS listener.
|
||||
// - the bland hashed host from turn.StealthHostForNamespace (DPI-resistant)
|
||||
// - a human-readable "turn.ns-<namespace>.<base_domain>" alias (operator UX)
|
||||
|
||||
// TURNDiscoveryConfig configures the namespaces scan that derives per-namespace
|
||||
// stealth-TURN routes. All fields are required; a zero RescanInterval selects
|
||||
// DefaultDiscoveryRescanInterval.
|
||||
type TURNDiscoveryConfig struct {
|
||||
// NamespacesDir is the directory holding one subdirectory per namespace,
|
||||
// each containing a "configs/turn-*.yaml" written by the namespace spawner
|
||||
// (e.g. "/opt/orama/.orama/data/namespaces").
|
||||
NamespacesDir string `yaml:"namespaces_dir"`
|
||||
|
||||
// BaseDomain is the cluster's base domain (e.g. "orama-devnet.network"),
|
||||
// used to derive the stealth and "turn.ns-*" SNI hostnames.
|
||||
BaseDomain string `yaml:"base_domain"`
|
||||
|
||||
// RescanInterval is how often the namespaces directory is rescanned. Zero
|
||||
// selects DefaultDiscoveryRescanInterval.
|
||||
RescanInterval time.Duration `yaml:"rescan_interval"`
|
||||
}
|
||||
|
||||
// Validate reports configuration errors. It does not touch the filesystem; a
|
||||
// missing NamespacesDir at scan time is a transient error handled by the
|
||||
// discoverer (previous routes are kept), not a config error.
|
||||
func (c *TURNDiscoveryConfig) Validate() []string {
|
||||
var errs []string
|
||||
if c.NamespacesDir == "" {
|
||||
errs = append(errs, "turn_discovery.namespaces_dir: required")
|
||||
}
|
||||
if c.BaseDomain == "" {
|
||||
errs = append(errs, "turn_discovery.base_domain: required")
|
||||
}
|
||||
return errs
|
||||
}
|
||||
|
||||
// DiscoverTURNRoutes scans cfg.NamespacesDir for per-namespace TURN configs and
|
||||
// returns two routes per namespace that exposes a TURNS listener:
|
||||
//
|
||||
// - turn.StealthHostForNamespace(namespace, baseDomain) -> 127.0.0.1:<tls-port>
|
||||
// - "turn.ns-<namespace>.<baseDomain>" -> 127.0.0.1:<tls-port>
|
||||
//
|
||||
// Namespaces whose TURN config has an empty turns_listen_addr (TURNS disabled)
|
||||
// are skipped. A turn-*.yaml that cannot be read or parsed is skipped with a
|
||||
// per-file warning, but the scan continues for the rest — one bad file must not
|
||||
// hide every other namespace's routes.
|
||||
//
|
||||
// A failure to read the namespaces directory itself returns an error so callers
|
||||
// can keep the previously-installed routes rather than wiping them on a
|
||||
// transient filesystem error.
|
||||
func DiscoverTURNRoutes(cfg TURNDiscoveryConfig, logger *zap.Logger) ([]Route, error) {
|
||||
if logger == nil {
|
||||
logger = zap.NewNop()
|
||||
}
|
||||
|
||||
entries, err := os.ReadDir(cfg.NamespacesDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read namespaces dir %s: %w", cfg.NamespacesDir, err)
|
||||
}
|
||||
|
||||
var routes []Route
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
nsRoutes := discoverNamespaceRoutes(cfg, entry.Name(), logger)
|
||||
routes = append(routes, nsRoutes...)
|
||||
}
|
||||
|
||||
// Deterministic order keeps Router.Replace idempotent and tests stable.
|
||||
sort.Slice(routes, func(i, j int) bool { return routes[i].Match < routes[j].Match })
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
// discoverNamespaceRoutes resolves the stealth + alias routes for a single
|
||||
// namespace directory. Returns nil when the namespace has no TURNS listener or
|
||||
// its config is unreadable/unparseable (logged, not fatal).
|
||||
func discoverNamespaceRoutes(cfg TURNDiscoveryConfig, nsDir string, logger *zap.Logger) []Route {
|
||||
glob := filepath.Join(cfg.NamespacesDir, nsDir, turnConfigGlob)
|
||||
matches, err := filepath.Glob(glob)
|
||||
if err != nil {
|
||||
// Glob only errors on a malformed pattern, which turnConfigGlob is not;
|
||||
// guard anyway so a future edit can't silently swallow it.
|
||||
logger.Warn("turn-config glob failed",
|
||||
zap.String("namespace_dir", nsDir), zap.Error(err))
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, configPath := range matches {
|
||||
namespace, tlsPort, ok := parseTURNConfig(configPath, logger)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
backend := Backend{
|
||||
Name: stealthBackendNamePrefix + namespace,
|
||||
Network: "tcp",
|
||||
Addr: net.JoinHostPort("127.0.0.1", tlsPort),
|
||||
}
|
||||
return []Route{
|
||||
{Match: turn.StealthHostForNamespace(namespace, cfg.BaseDomain), Backend: backend},
|
||||
{Match: fmt.Sprintf("turn.ns-%s.%s", namespace, cfg.BaseDomain), Backend: backend},
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseTURNConfig reads a turn-*.yaml and returns its namespace and TURNS port.
|
||||
// ok is false (with a warning) when the file is unreadable/unparseable, when it
|
||||
// names no namespace, or when TURNS is disabled (empty turns_listen_addr).
|
||||
func parseTURNConfig(path string, logger *zap.Logger) (namespace, tlsPort string, ok bool) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
logger.Warn("read turn config failed", zap.String("path", path), zap.Error(err))
|
||||
return "", "", false
|
||||
}
|
||||
|
||||
var c turn.Config
|
||||
if err := yaml.Unmarshal(data, &c); err != nil {
|
||||
logger.Warn("parse turn config failed", zap.String("path", path), zap.Error(err))
|
||||
return "", "", false
|
||||
}
|
||||
|
||||
if c.Namespace == "" {
|
||||
logger.Warn("turn config has empty namespace", zap.String("path", path))
|
||||
return "", "", false
|
||||
}
|
||||
if strings.TrimSpace(c.TURNSListenAddr) == "" {
|
||||
// TURNS disabled for this namespace — no stealth route, not an error.
|
||||
return "", "", false
|
||||
}
|
||||
|
||||
port, err := portFromListenAddr(c.TURNSListenAddr)
|
||||
if err != nil {
|
||||
logger.Warn("turn config has invalid turns_listen_addr",
|
||||
zap.String("path", path),
|
||||
zap.String("turns_listen_addr", c.TURNSListenAddr),
|
||||
zap.Error(err))
|
||||
return "", "", false
|
||||
}
|
||||
return c.Namespace, port, true
|
||||
}
|
||||
|
||||
// portFromListenAddr extracts the port from a "host:port" TURNS listen address
|
||||
// (e.g. "0.0.0.0:5349" -> "5349"). The router always dials 127.0.0.1, so only
|
||||
// the port is needed.
|
||||
func portFromListenAddr(addr string) (string, error) {
|
||||
_, port, err := net.SplitHostPort(addr)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("split host:port: %w", err)
|
||||
}
|
||||
if port == "" {
|
||||
return "", fmt.Errorf("empty port in %q", addr)
|
||||
}
|
||||
return port, nil
|
||||
}
|
||||
167
core/pkg/sniproxy/discovery_test.go
Normal file
167
core/pkg/sniproxy/discovery_test.go
Normal file
@ -0,0 +1,167 @@
|
||||
package sniproxy
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||
)
|
||||
|
||||
// writeTURNConfig is a test helper that lays out the on-disk shape the namespace
|
||||
// spawner produces: <namespacesDir>/<namespace>/configs/turn-<nodeID>.yaml.
|
||||
func writeTURNConfig(t *testing.T, namespacesDir, namespace, nodeID, turnsAddr string) {
|
||||
t.Helper()
|
||||
configDir := filepath.Join(namespacesDir, namespace, "configs")
|
||||
if err := os.MkdirAll(configDir, 0755); err != nil {
|
||||
t.Fatalf("mkdir configs failed: %v", err)
|
||||
}
|
||||
content := "namespace: \"" + namespace + "\"\n"
|
||||
content += "turns_listen_addr: \"" + turnsAddr + "\"\n"
|
||||
path := filepath.Join(configDir, "turn-"+nodeID+".yaml")
|
||||
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
|
||||
t.Fatalf("write turn config failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDiscoverTURNRoutes_scansFixtureDir verifies that two namespaces each with
|
||||
// a TURNS listener yield two routes apiece (stealth host + turn.ns-* alias),
|
||||
// while a namespace with an empty turns_listen_addr is skipped entirely.
|
||||
func TestDiscoverTURNRoutes_scansFixtureDir(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
const base = "orama-devnet.network"
|
||||
|
||||
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
|
||||
writeTURNConfig(t, dir, "video", "node-1", "0.0.0.0:5350")
|
||||
// TURNS disabled — must produce no routes.
|
||||
writeTURNConfig(t, dir, "noturns", "node-1", "")
|
||||
|
||||
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||
NamespacesDir: dir,
|
||||
BaseDomain: base,
|
||||
}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
|
||||
}
|
||||
|
||||
// 2 namespaces with TURNS × 2 routes each = 4.
|
||||
if len(routes) != 4 {
|
||||
t.Fatalf("expected 4 routes, got %d: %+v", len(routes), routes)
|
||||
}
|
||||
|
||||
got := map[string]string{}
|
||||
for _, r := range routes {
|
||||
got[r.Match] = r.Backend.Addr
|
||||
}
|
||||
|
||||
// anchat: backend port 5349, stealth host + alias.
|
||||
anchatStealth := turn.StealthHostForNamespace("anchat", base)
|
||||
if got[anchatStealth] != "127.0.0.1:5349" {
|
||||
t.Errorf("anchat stealth route missing/wrong: %q -> %q", anchatStealth, got[anchatStealth])
|
||||
}
|
||||
if got["turn.ns-anchat."+base] != "127.0.0.1:5349" {
|
||||
t.Errorf("anchat alias route missing/wrong: got %q", got["turn.ns-anchat."+base])
|
||||
}
|
||||
|
||||
// video: backend port 5350.
|
||||
videoStealth := turn.StealthHostForNamespace("video", base)
|
||||
if got[videoStealth] != "127.0.0.1:5350" {
|
||||
t.Errorf("video stealth route missing/wrong: %q -> %q", videoStealth, got[videoStealth])
|
||||
}
|
||||
if got["turn.ns-video."+base] != "127.0.0.1:5350" {
|
||||
t.Errorf("video alias route missing/wrong: got %q", got["turn.ns-video."+base])
|
||||
}
|
||||
|
||||
// The disabled namespace must not appear under any of its hostnames.
|
||||
if _, ok := got["turn.ns-noturns."+base]; ok {
|
||||
t.Errorf("noturns namespace should be skipped (empty turns_listen_addr)")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDiscoverTURNRoutes_emptyTURNSAddrSkipped is a focused check that a single
|
||||
// namespace with an empty turns_listen_addr produces zero routes (no error).
|
||||
func TestDiscoverTURNRoutes_emptyTURNSAddrSkipped(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
writeTURNConfig(t, dir, "noturns", "node-1", "")
|
||||
|
||||
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||
NamespacesDir: dir,
|
||||
BaseDomain: "example.com",
|
||||
}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
|
||||
}
|
||||
if len(routes) != 0 {
|
||||
t.Errorf("expected 0 routes for TURNS-disabled namespace, got %d: %+v", len(routes), routes)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDiscoverTURNRoutes_unreadableDirReturnsError verifies a missing namespaces
|
||||
// directory is a transient error (so callers keep previous routes), not a silent
|
||||
// empty result.
|
||||
func TestDiscoverTURNRoutes_unreadableDirReturnsError(t *testing.T) {
|
||||
missing := filepath.Join(t.TempDir(), "does-not-exist")
|
||||
|
||||
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||
NamespacesDir: missing,
|
||||
BaseDomain: "example.com",
|
||||
}, nil)
|
||||
if err == nil {
|
||||
t.Fatalf("expected an error for unreadable namespaces dir, got nil (routes=%+v)", routes)
|
||||
}
|
||||
if routes != nil {
|
||||
t.Errorf("expected nil routes on error, got %+v", routes)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDiscoverTURNRoutes_malformedFileSkipped verifies one unparseable
|
||||
// turn-*.yaml is skipped while a sibling valid namespace still yields routes
|
||||
// (one bad file must not hide the rest).
|
||||
func TestDiscoverTURNRoutes_malformedFileSkipped(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
const base = "example.com"
|
||||
|
||||
writeTURNConfig(t, dir, "good", "node-1", "0.0.0.0:5349")
|
||||
|
||||
badDir := filepath.Join(dir, "bad", "configs")
|
||||
if err := os.MkdirAll(badDir, 0755); err != nil {
|
||||
t.Fatalf("mkdir bad configs failed: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(badDir, "turn-node-1.yaml"), []byte(":\n not: [valid"), 0644); err != nil {
|
||||
t.Fatalf("write malformed config failed: %v", err)
|
||||
}
|
||||
|
||||
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||
NamespacesDir: dir,
|
||||
BaseDomain: base,
|
||||
}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
|
||||
}
|
||||
if len(routes) != 2 {
|
||||
t.Fatalf("expected 2 routes from the good namespace, got %d: %+v", len(routes), routes)
|
||||
}
|
||||
goodStealth := turn.StealthHostForNamespace("good", base)
|
||||
found := false
|
||||
for _, r := range routes {
|
||||
if r.Match == goodStealth {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("good namespace stealth route missing despite malformed sibling")
|
||||
}
|
||||
}
|
||||
|
||||
// TestTURNDiscoveryConfig_Validate covers the required-field validation.
|
||||
func TestTURNDiscoveryConfig_Validate(t *testing.T) {
|
||||
if errs := (&TURNDiscoveryConfig{NamespacesDir: "/x", BaseDomain: "example.com"}).Validate(); len(errs) != 0 {
|
||||
t.Errorf("valid config reported errors: %v", errs)
|
||||
}
|
||||
if errs := (&TURNDiscoveryConfig{BaseDomain: "example.com"}).Validate(); len(errs) == 0 {
|
||||
t.Errorf("missing namespaces_dir should be invalid")
|
||||
}
|
||||
if errs := (&TURNDiscoveryConfig{NamespacesDir: "/x"}).Validate(); len(errs) == 0 {
|
||||
t.Errorf("missing base_domain should be invalid")
|
||||
}
|
||||
}
|
||||
@ -36,6 +36,27 @@ type Config struct {
|
||||
|
||||
// Namespace this TURN instance belongs to
|
||||
Namespace string `yaml:"namespace"`
|
||||
|
||||
// StealthDomain is the neutral, CDN-bland SNI hostname this server also
|
||||
// answers TURNS for (e.g. "cdn-a1b2c3d4e5f6.orama-devnet.network").
|
||||
//
|
||||
// The stealth endpoint is an SNI-router passthrough, NOT a separate TURN
|
||||
// server: a router on :443 reads only the TLS ClientHello SNI and forwards
|
||||
// the raw bytes for this hostname to this same TURNS listener. TLS is still
|
||||
// terminated here, by this TURN server, which therefore presents two certs
|
||||
// (the primary TURN domain and StealthDomain) selected by ClientHello SNI.
|
||||
// When empty, the stealth endpoint is disabled and behavior is unchanged.
|
||||
StealthDomain string `yaml:"stealth_domain,omitempty"`
|
||||
|
||||
// TLSStealthCertPath is the path to the TLS certificate PEM file presented
|
||||
// for StealthDomain. The SNI router only forwards bytes; this TURN server
|
||||
// terminates the TLS handshake, so it needs the stealth domain's cert here.
|
||||
TLSStealthCertPath string `yaml:"tls_stealth_cert_path,omitempty"`
|
||||
|
||||
// TLSStealthKeyPath is the path to the TLS private key PEM file for the
|
||||
// StealthDomain certificate (TURN terminates TLS for the router-forwarded
|
||||
// stealth connections).
|
||||
TLSStealthKeyPath string `yaml:"tls_stealth_key_path,omitempty"`
|
||||
}
|
||||
|
||||
// Validate checks the TURN configuration for errors
|
||||
|
||||
@ -15,6 +15,11 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// stealthConfigFieldCount is the number of stealth TLS config fields that must
|
||||
// be set together (StealthDomain, TLSStealthCertPath, TLSStealthKeyPath). Any
|
||||
// other count is a partial config and fails server startup.
|
||||
const stealthConfigFieldCount = 3
|
||||
|
||||
// Server wraps a Pion TURN server with namespace-scoped HMAC-SHA1 authentication.
|
||||
type Server struct {
|
||||
config *Config
|
||||
@ -24,8 +29,9 @@ type Server struct {
|
||||
tcpListener net.Listener // Plain TCP listener on primary port (3478)
|
||||
tlsListener net.Listener // TLS TCP listener for TURNS (port 5349)
|
||||
|
||||
certReloader *certReloader // hot-reloads the TURNS cert; nil when TURNS disabled
|
||||
certStop chan struct{} // closed to stop the cert-reload watcher goroutine
|
||||
certReloader *certReloader // hot-reloads the primary TURNS cert; nil when TURNS disabled
|
||||
stealthCertReloader *certReloader // hot-reloads the stealth-SNI cert; nil when stealth disabled
|
||||
certStop chan struct{} // closed to stop the cert-reload watcher goroutine(s)
|
||||
}
|
||||
|
||||
// NewServer creates and starts a TURN server.
|
||||
@ -94,8 +100,18 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
|
||||
s.closeListeners()
|
||||
return nil, fmt.Errorf("failed to load TLS cert/key: %w", err)
|
||||
}
|
||||
s.certReloader = reloader
|
||||
|
||||
// Stealth SNI: when configured, terminate TLS for a second (neutral)
|
||||
// hostname using its own hot-reloading cert. The SNI router forwards the
|
||||
// raw stealth-domain bytes to this listener; selection is by ServerName.
|
||||
if err := s.loadStealthCertReloader(cfg); err != nil {
|
||||
s.closeListeners()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tlsConfig := &tls.Config{
|
||||
GetCertificate: reloader.GetCertificate,
|
||||
GetCertificate: newGetCertificate(cfg.StealthDomain, reloader, s.stealthCertReloader),
|
||||
MinVersion: tls.VersionTLS12,
|
||||
}
|
||||
tlsListener, err := tls.Listen("tcp", cfg.TURNSListenAddr, tlsConfig)
|
||||
@ -104,9 +120,11 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
|
||||
return nil, fmt.Errorf("failed to listen on %s: %w", cfg.TURNSListenAddr, err)
|
||||
}
|
||||
s.tlsListener = tlsListener
|
||||
s.certReloader = reloader
|
||||
s.certStop = make(chan struct{})
|
||||
go reloader.watch(turnCertReloadInterval, s.certStop)
|
||||
if s.stealthCertReloader != nil {
|
||||
go s.stealthCertReloader.watch(turnCertReloadInterval, s.certStop)
|
||||
}
|
||||
|
||||
listenerConfigs = append(listenerConfigs, pionTurn.ListenerConfig{
|
||||
Listener: tlsListener,
|
||||
@ -150,6 +168,62 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// loadStealthCertReloader sets up the second cert reloader used for the stealth
|
||||
// SNI hostname, storing it on s.stealthCertReloader. The three stealth fields
|
||||
// (StealthDomain, TLSStealthCertPath, TLSStealthKeyPath) are all-or-nothing: a
|
||||
// partial config is an operator mistake and fails startup rather than silently
|
||||
// running without the stealth endpoint. When none are set, stealth is disabled
|
||||
// and the primary TLS path is byte-for-byte unchanged.
|
||||
func (s *Server) loadStealthCertReloader(cfg *Config) error {
|
||||
set := 0
|
||||
if cfg.StealthDomain != "" {
|
||||
set++
|
||||
}
|
||||
if cfg.TLSStealthCertPath != "" {
|
||||
set++
|
||||
}
|
||||
if cfg.TLSStealthKeyPath != "" {
|
||||
set++
|
||||
}
|
||||
if set == 0 {
|
||||
return nil // stealth disabled
|
||||
}
|
||||
if set != stealthConfigFieldCount {
|
||||
var missing []string
|
||||
if cfg.StealthDomain == "" {
|
||||
missing = append(missing, "stealth_domain")
|
||||
}
|
||||
if cfg.TLSStealthCertPath == "" {
|
||||
missing = append(missing, "tls_stealth_cert_path")
|
||||
}
|
||||
if cfg.TLSStealthKeyPath == "" {
|
||||
missing = append(missing, "tls_stealth_key_path")
|
||||
}
|
||||
return fmt.Errorf("turn: partial stealth config — set all of [stealth_domain, tls_stealth_cert_path, tls_stealth_key_path] or none; missing: %s", strings.Join(missing, ", "))
|
||||
}
|
||||
|
||||
reloader, err := newCertReloader(cfg.TLSStealthCertPath, cfg.TLSStealthKeyPath, s.logger)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load stealth TLS cert/key (cert=%s, key=%s): %w", cfg.TLSStealthCertPath, cfg.TLSStealthKeyPath, err)
|
||||
}
|
||||
s.stealthCertReloader = reloader
|
||||
return nil
|
||||
}
|
||||
|
||||
// newGetCertificate builds the tls.Config.GetCertificate callback. When the
|
||||
// ClientHello ServerName equals stealthDomain (case-insensitively), it serves
|
||||
// the stealth cert; every other case — including empty SNI and the primary TURN
|
||||
// domain — serves the primary cert, preserving the pre-stealth behavior. When
|
||||
// stealth is disabled (stealthReloader nil) it is exactly primary.GetCertificate.
|
||||
func newGetCertificate(stealthDomain string, primary, stealth *certReloader) func(*tls.ClientHelloInfo) (*tls.Certificate, error) {
|
||||
return func(hello *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
||||
if stealth != nil && hello != nil && strings.EqualFold(hello.ServerName, stealthDomain) {
|
||||
return stealth.GetCertificate(hello)
|
||||
}
|
||||
return primary.GetCertificate(hello)
|
||||
}
|
||||
}
|
||||
|
||||
// authHandler validates HMAC-SHA1 credentials.
|
||||
// Username format: {expiry_unix}:{namespace}
|
||||
// Password: base64(HMAC-SHA1(shared_secret, username))
|
||||
@ -239,6 +313,8 @@ func (s *Server) closeListeners() {
|
||||
s.tlsListener.Close()
|
||||
s.tlsListener = nil
|
||||
}
|
||||
s.certReloader = nil
|
||||
s.stealthCertReloader = nil
|
||||
}
|
||||
|
||||
// GenerateCredentials creates time-limited HMAC-SHA1 TURN credentials.
|
||||
|
||||
26
core/pkg/turn/stealth.go
Normal file
26
core/pkg/turn/stealth.go
Normal file
@ -0,0 +1,26 @@
|
||||
package turn
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// stealthHostHashBytes is how many bytes of the namespace digest appear in the
|
||||
// stealth hostname label. 6 bytes (12 hex chars) keeps the label CDN-bland
|
||||
// while making cross-namespace collisions negligible at platform scale.
|
||||
const stealthHostHashBytes = 6
|
||||
|
||||
// StealthHostForNamespace derives the censorship-resistant TURNS hostname for
|
||||
// a namespace: "cdn-<12-hex-of-sha256(namespace)>.<baseDomain>".
|
||||
//
|
||||
// Design (feat-124): the label must NOT contain the namespace (an SNI string
|
||||
// like "cdn.ns-anchat-test.…" hands DPI the exact app to block), must be
|
||||
// deterministic so every component (cluster manager, namespace gateway, SNI
|
||||
// router, DNS) derives the same value with no extra coordination, and must be
|
||||
// unique per namespace because the SNI router maps it to that namespace's
|
||||
// TURN-TLS backend.
|
||||
func StealthHostForNamespace(namespace, baseDomain string) string {
|
||||
sum := sha256.Sum256([]byte(namespace))
|
||||
return fmt.Sprintf("cdn-%s.%s", hex.EncodeToString(sum[:stealthHostHashBytes]), baseDomain)
|
||||
}
|
||||
201
core/pkg/turn/stealth_server_test.go
Normal file
201
core/pkg/turn/stealth_server_test.go
Normal file
@ -0,0 +1,201 @@
|
||||
package turn
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/tls"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// feat-124: the stealth TURNS endpoint is an SNI-router passthrough — the TURN
|
||||
// server terminates TLS for both the primary TURN domain and a neutral stealth
|
||||
// domain, selecting the cert by ClientHello SNI. These pin: per-SNI selection
|
||||
// (incl. empty SNI, case-insensitivity), partial-config startup failure, and
|
||||
// the missing stealth-cert startup failure (no silent fallback).
|
||||
|
||||
const (
|
||||
stealthTestDomain = "cdn-a1b2c3d4e5f6.orama-devnet.network"
|
||||
turnTestDomain = "turn.orama-devnet.network"
|
||||
)
|
||||
|
||||
func writeNamedCert(t *testing.T, dir, name string) (certPath, keyPath string) {
|
||||
t.Helper()
|
||||
certPath = filepath.Join(dir, name+".pem")
|
||||
keyPath = filepath.Join(dir, name+".key.pem")
|
||||
if err := GenerateSelfSignedCert(certPath, keyPath, "127.0.0.1"); err != nil {
|
||||
t.Fatalf("GenerateSelfSignedCert(%s): %v", name, err)
|
||||
}
|
||||
return certPath, keyPath
|
||||
}
|
||||
|
||||
func certLeafForSNI(t *testing.T, getCert func(*tls.ClientHelloInfo) (*tls.Certificate, error), serverName string) []byte {
|
||||
t.Helper()
|
||||
cert, err := getCert(&tls.ClientHelloInfo{ServerName: serverName})
|
||||
if err != nil {
|
||||
t.Fatalf("GetCertificate(%q): %v", serverName, err)
|
||||
}
|
||||
if cert == nil || len(cert.Certificate) == 0 {
|
||||
t.Fatalf("GetCertificate(%q) returned an empty certificate", serverName)
|
||||
}
|
||||
return cert.Certificate[0]
|
||||
}
|
||||
|
||||
func TestGetCertificate_stealthSNISelectsStealthCert(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
|
||||
stealthCert, stealthKey := writeNamedCert(t, dir, "stealth")
|
||||
|
||||
primary, err := newCertReloader(primaryCert, primaryKey, zap.NewNop())
|
||||
if err != nil {
|
||||
t.Fatalf("newCertReloader(primary): %v", err)
|
||||
}
|
||||
stealth, err := newCertReloader(stealthCert, stealthKey, zap.NewNop())
|
||||
if err != nil {
|
||||
t.Fatalf("newCertReloader(stealth): %v", err)
|
||||
}
|
||||
|
||||
getCert := newGetCertificate(stealthTestDomain, primary, stealth)
|
||||
|
||||
wantPrimary := leafDER(t, primary)
|
||||
wantStealth := leafDER(t, stealth)
|
||||
if bytes.Equal(wantPrimary, wantStealth) {
|
||||
t.Fatal("test setup error: primary and stealth certs must be distinct")
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
serverName string
|
||||
want []byte
|
||||
}{
|
||||
{"stealth SNI selects stealth cert", stealthTestDomain, wantStealth},
|
||||
{"stealth SNI is case-insensitive", strings.ToUpper(stealthTestDomain), wantStealth},
|
||||
{"turn domain SNI selects primary cert", turnTestDomain, wantPrimary},
|
||||
{"empty SNI selects primary cert", "", wantPrimary},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := certLeafForSNI(t, getCert, tt.serverName)
|
||||
if !bytes.Equal(got, tt.want) {
|
||||
t.Errorf("ServerName=%q served the wrong certificate", tt.serverName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetCertificate_stealthDisabledAlwaysPrimary(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
|
||||
primary, err := newCertReloader(primaryCert, primaryKey, zap.NewNop())
|
||||
if err != nil {
|
||||
t.Fatalf("newCertReloader(primary): %v", err)
|
||||
}
|
||||
|
||||
// Stealth disabled (nil reloader): every SNI — including a string that looks
|
||||
// like a stealth host — must serve the primary cert unchanged.
|
||||
getCert := newGetCertificate("", primary, nil)
|
||||
want := leafDER(t, primary)
|
||||
|
||||
for _, serverName := range []string{"", turnTestDomain, stealthTestDomain} {
|
||||
if got := certLeafForSNI(t, getCert, serverName); !bytes.Equal(got, want) {
|
||||
t.Errorf("ServerName=%q must serve the primary cert when stealth is disabled", serverName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func baseStealthConfig(t *testing.T) *Config {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
|
||||
return &Config{
|
||||
ListenAddr: "127.0.0.1:0",
|
||||
TURNSListenAddr: "127.0.0.1:0",
|
||||
TLSCertPath: primaryCert,
|
||||
TLSKeyPath: primaryKey,
|
||||
PublicIP: "127.0.0.1",
|
||||
Realm: "orama-devnet.network",
|
||||
AuthSecret: "test-secret-key",
|
||||
RelayPortStart: 49152,
|
||||
RelayPortEnd: 50000,
|
||||
Namespace: "test-ns",
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_partialStealthConfigFails(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
mutate func(c *Config)
|
||||
wantMissing []string
|
||||
}{
|
||||
{
|
||||
name: "only stealth_domain set",
|
||||
mutate: func(c *Config) { c.StealthDomain = stealthTestDomain },
|
||||
wantMissing: []string{"tls_stealth_cert_path", "tls_stealth_key_path"},
|
||||
},
|
||||
{
|
||||
name: "domain and cert set, key missing",
|
||||
mutate: func(c *Config) { c.StealthDomain = stealthTestDomain; c.TLSStealthCertPath = "/tmp/x.pem" },
|
||||
wantMissing: []string{"tls_stealth_key_path"},
|
||||
},
|
||||
{
|
||||
name: "only cert path set",
|
||||
mutate: func(c *Config) { c.TLSStealthCertPath = "/tmp/x.pem" },
|
||||
wantMissing: []string{"stealth_domain", "tls_stealth_key_path"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
cfg := baseStealthConfig(t)
|
||||
tt.mutate(cfg)
|
||||
|
||||
srv, err := NewServer(cfg, zap.NewNop())
|
||||
if err == nil {
|
||||
srv.Close()
|
||||
t.Fatal("expected startup to fail on partial stealth config")
|
||||
}
|
||||
for _, field := range tt.wantMissing {
|
||||
if !strings.Contains(err.Error(), field) {
|
||||
t.Errorf("error must name the missing field %q; got: %v", field, err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_missingStealthCertFails(t *testing.T) {
|
||||
cfg := baseStealthConfig(t)
|
||||
cfg.StealthDomain = stealthTestDomain
|
||||
cfg.TLSStealthCertPath = filepath.Join(t.TempDir(), "absent-cert.pem")
|
||||
cfg.TLSStealthKeyPath = filepath.Join(t.TempDir(), "absent-key.pem")
|
||||
|
||||
srv, err := NewServer(cfg, zap.NewNop())
|
||||
if err == nil {
|
||||
srv.Close()
|
||||
t.Fatal("expected startup to fail when the stealth cert file is absent")
|
||||
}
|
||||
if !strings.Contains(err.Error(), cfg.TLSStealthCertPath) {
|
||||
t.Errorf("error must name the missing stealth cert path %q; got: %v", cfg.TLSStealthCertPath, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServer_fullStealthConfigStarts(t *testing.T) {
|
||||
cfg := baseStealthConfig(t)
|
||||
dir := t.TempDir()
|
||||
stealthCert, stealthKey := writeNamedCert(t, dir, "stealth")
|
||||
cfg.StealthDomain = stealthTestDomain
|
||||
cfg.TLSStealthCertPath = stealthCert
|
||||
cfg.TLSStealthKeyPath = stealthKey
|
||||
|
||||
srv, err := NewServer(cfg, zap.NewNop())
|
||||
if err != nil {
|
||||
t.Fatalf("expected startup to succeed with full stealth config: %v", err)
|
||||
}
|
||||
defer srv.Close()
|
||||
if srv.stealthCertReloader == nil {
|
||||
t.Error("stealthCertReloader must be set when stealth is fully configured")
|
||||
}
|
||||
}
|
||||
53
core/pkg/turn/stealth_test.go
Normal file
53
core/pkg/turn/stealth_test.go
Normal file
@ -0,0 +1,53 @@
|
||||
package turn
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestStealthHostForNamespace_deterministic(t *testing.T) {
|
||||
a := StealthHostForNamespace("anchat-test", "orama-devnet.network")
|
||||
b := StealthHostForNamespace("anchat-test", "orama-devnet.network")
|
||||
if a != b {
|
||||
t.Fatalf("not deterministic: %q vs %q", a, b)
|
||||
}
|
||||
if !strings.HasPrefix(a, "cdn-") || !strings.HasSuffix(a, ".orama-devnet.network") {
|
||||
t.Errorf("unexpected shape: %q", a)
|
||||
}
|
||||
// label = "cdn-" + 12 hex chars
|
||||
label := strings.SplitN(a, ".", 2)[0]
|
||||
if len(label) != len("cdn-")+stealthHostHashBytes*2 {
|
||||
t.Errorf("label %q has wrong length", label)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStealthHostForNamespace_namespaceNotLeaked(t *testing.T) {
|
||||
h := StealthHostForNamespace("anchat-test", "orama-devnet.network")
|
||||
if strings.Contains(h, "anchat") {
|
||||
t.Errorf("stealth host %q leaks the namespace name", h)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStealthHostForNamespace_distinctPerNamespace(t *testing.T) {
|
||||
a := StealthHostForNamespace("ns-a", "example.com")
|
||||
b := StealthHostForNamespace("ns-b", "example.com")
|
||||
if a == b {
|
||||
t.Fatalf("different namespaces produced the same stealth host %q", a)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStealthHostForNamespace_matchesDNSNameAllowlist guards the contract that
|
||||
// the derived host always passes the Caddyfile DNS-name allowlist
|
||||
// (pkg/namespace turn_cert.go dnsNamePattern) — a legitimate stealth domain
|
||||
// must never be rejected by that defense-in-depth check. Mirrors the same
|
||||
// conservative pattern here to avoid an import cycle.
|
||||
func TestStealthHostForNamespace_matchesDNSNameAllowlist(t *testing.T) {
|
||||
dnsName := regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$`)
|
||||
for _, ns := range []string{"anchat-test", "a", "ns-with-many-dashes", "x1y2z3"} {
|
||||
h := StealthHostForNamespace(ns, "orama-devnet.network")
|
||||
if !dnsName.MatchString(h) {
|
||||
t.Errorf("derived stealth host %q for ns %q fails the DNS-name allowlist", h, ns)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user