mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-06-17 00:14:13 +00:00
feat(gateway): implement stealth TURN discovery and configuration
- Add `turn_stealth_domain` to gateway config for stealth TURN support - Introduce `turn_discovery` in `sni-router` to auto-discover per-namespace routes - Add database migration to enable stealth TURN per namespace - Document ephemeral state API in `SERVERLESS.md`
This commit is contained in:
parent
f192cd0b84
commit
b9d5f542e1
@ -74,6 +74,10 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
|
|||||||
SFUPort int `yaml:"sfu_port"`
|
SFUPort int `yaml:"sfu_port"`
|
||||||
TURNDomain string `yaml:"turn_domain"`
|
TURNDomain string `yaml:"turn_domain"`
|
||||||
TURNSecret string `yaml:"turn_secret"`
|
TURNSecret string `yaml:"turn_secret"`
|
||||||
|
// TURNStealthDomain is the neutral stealth TURNS:443 host (feat-124).
|
||||||
|
// Maps to cfg.StealthCDNDomain so turn.credentials advertises the
|
||||||
|
// stealth rung of the URI ladder.
|
||||||
|
TURNStealthDomain string `yaml:"turn_stealth_domain"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type yamlCfg struct {
|
type yamlCfg struct {
|
||||||
@ -256,6 +260,9 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
|
|||||||
if v := strings.TrimSpace(y.WebRTC.TURNSecret); v != "" {
|
if v := strings.TrimSpace(y.WebRTC.TURNSecret); v != "" {
|
||||||
cfg.TURNSecret = v
|
cfg.TURNSecret = v
|
||||||
}
|
}
|
||||||
|
if v := strings.TrimSpace(y.WebRTC.TURNStealthDomain); v != "" {
|
||||||
|
cfg.StealthCDNDomain = v
|
||||||
|
}
|
||||||
|
|
||||||
// Validate configuration
|
// Validate configuration
|
||||||
if errs := cfg.ValidateConfig(); len(errs) > 0 {
|
if errs := cfg.ValidateConfig(); len(errs) > 0 {
|
||||||
|
|||||||
@ -32,6 +32,18 @@
|
|||||||
// backend:
|
// backend:
|
||||||
// name: gateway
|
// name: gateway
|
||||||
// addr: "127.0.0.1:8443"
|
// addr: "127.0.0.1:8443"
|
||||||
|
// turn_discovery:
|
||||||
|
// namespaces_dir: /opt/orama/.orama/data/namespaces
|
||||||
|
// base_domain: orama-devnet.network
|
||||||
|
// rescan_interval: 30s
|
||||||
|
//
|
||||||
|
// When the turn_discovery.namespaces_dir is set, the router additionally scans
|
||||||
|
// <namespaces_dir>/*/configs/turn-*.yaml every rescan_interval and derives two
|
||||||
|
// routes per namespace with a TURNS listener — the bland stealth host and a
|
||||||
|
// "turn.ns-<namespace>.<base_domain>" alias — both forwarding to that
|
||||||
|
// namespace's local TURNS port. Discovered routes are merged with the static
|
||||||
|
// routes above (static wins on conflict); a transient scan error keeps the
|
||||||
|
// previously-installed routes.
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -69,6 +81,15 @@ type yamlRoute struct {
|
|||||||
Backend yamlBackend `yaml:"backend"`
|
Backend yamlBackend `yaml:"backend"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// yamlTURNDiscovery mirrors sniproxy.TURNDiscoveryConfig for YAML decoding.
|
||||||
|
// When present and namespaces_dir is set, the router auto-discovers per-
|
||||||
|
// namespace stealth-TURN routes by scanning <namespaces_dir>/*/configs/turn-*.yaml.
|
||||||
|
type yamlTURNDiscovery struct {
|
||||||
|
NamespacesDir string `yaml:"namespaces_dir"`
|
||||||
|
BaseDomain string `yaml:"base_domain"`
|
||||||
|
RescanInterval time.Duration `yaml:"rescan_interval"`
|
||||||
|
}
|
||||||
|
|
||||||
// yamlConfig is the on-disk configuration shape.
|
// yamlConfig is the on-disk configuration shape.
|
||||||
type yamlConfig struct {
|
type yamlConfig struct {
|
||||||
Listen string `yaml:"listen"`
|
Listen string `yaml:"listen"`
|
||||||
@ -77,6 +98,12 @@ type yamlConfig struct {
|
|||||||
MaxConcurrentConns int `yaml:"max_concurrent_conns"`
|
MaxConcurrentConns int `yaml:"max_concurrent_conns"`
|
||||||
Fallback yamlBackend `yaml:"fallback"`
|
Fallback yamlBackend `yaml:"fallback"`
|
||||||
Routes []yamlRoute `yaml:"routes"`
|
Routes []yamlRoute `yaml:"routes"`
|
||||||
|
TURNDiscovery yamlTURNDiscovery `yaml:"turn_discovery"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// discoveryEnabled reports whether TURN route auto-discovery is configured.
|
||||||
|
func (y *yamlConfig) discoveryEnabled() bool {
|
||||||
|
return y.TURNDiscovery.NamespacesDir != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@ -94,25 +121,49 @@ func main() {
|
|||||||
|
|
||||||
router := sniproxy.NewRouter(toBackend(cfg.Fallback))
|
router := sniproxy.NewRouter(toBackend(cfg.Fallback))
|
||||||
|
|
||||||
// Hot-reload the route table from the config file so a namespace's
|
// The static routes (and fallback) always come from the config file; this
|
||||||
// cdn/turn SNI routes can be added or removed without restarting the
|
// closure is re-evaluated on every reload/rescan so a hand-edit to the
|
||||||
// router (Router.Replace swaps atomically under in-flight connections).
|
// config is picked up without a restart.
|
||||||
reloader := sniproxy.NewFileRouteReloader(configPath,
|
staticSource := func() ([]sniproxy.Route, sniproxy.Backend, error) {
|
||||||
func() ([]sniproxy.Route, sniproxy.Backend, error) {
|
|
||||||
y, err := loadConfig(configPath)
|
y, err := loadConfig(configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, sniproxy.Backend{}, err
|
return nil, sniproxy.Backend{}, err
|
||||||
}
|
}
|
||||||
return toRoutes(y.Routes), toBackend(y.Fallback), nil
|
return toRoutes(y.Routes), toBackend(y.Fallback), nil
|
||||||
}, router, logger.Logger)
|
}
|
||||||
|
|
||||||
|
routeStop := make(chan struct{})
|
||||||
|
defer close(routeStop)
|
||||||
|
|
||||||
|
if cfg.discoveryEnabled() {
|
||||||
|
// Auto-discover per-namespace stealth-TURN routes by scanning the
|
||||||
|
// namespaces directory, merged with the static config routes (static
|
||||||
|
// wins on conflict), re-installed atomically every rescan_interval. A
|
||||||
|
// transient scan error keeps the previously-installed routes.
|
||||||
|
discoverer := sniproxy.NewTURNRouteDiscoverer(
|
||||||
|
sniproxy.TURNDiscoveryConfig{
|
||||||
|
NamespacesDir: cfg.TURNDiscovery.NamespacesDir,
|
||||||
|
BaseDomain: cfg.TURNDiscovery.BaseDomain,
|
||||||
|
RescanInterval: cfg.TURNDiscovery.RescanInterval,
|
||||||
|
}, staticSource, router, logger.Logger)
|
||||||
|
if err := discoverer.Apply(); err != nil {
|
||||||
|
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
|
||||||
|
zap.Error(err))
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
go discoverer.Run(routeStop)
|
||||||
|
} else {
|
||||||
|
// No discovery configured: hot-reload the static route table from the
|
||||||
|
// config file so cdn/turn SNI routes can be added or removed without
|
||||||
|
// restarting (Router.Replace swaps atomically under in-flight conns).
|
||||||
|
reloader := sniproxy.NewFileRouteReloader(configPath, staticSource, router, logger.Logger)
|
||||||
if err := reloader.Apply(); err != nil {
|
if err := reloader.Apply(); err != nil {
|
||||||
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
|
logger.ComponentError(logging.ComponentSNI, "Failed to install initial routes",
|
||||||
zap.Error(err))
|
zap.Error(err))
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
routeStop := make(chan struct{})
|
|
||||||
defer close(routeStop)
|
|
||||||
go reloader.Watch(sniproxy.DefaultRouteReloadInterval, routeStop)
|
go reloader.Watch(sniproxy.DefaultRouteReloadInterval, routeStop)
|
||||||
|
}
|
||||||
|
|
||||||
srv := sniproxy.NewServer(router, sniproxy.Config{
|
srv := sniproxy.NewServer(router, sniproxy.Config{
|
||||||
ClientHelloTimeout: cfg.ClientHelloTimeout,
|
ClientHelloTimeout: cfg.ClientHelloTimeout,
|
||||||
@ -235,6 +286,16 @@ func validateConfig(y *yamlConfig) []string {
|
|||||||
errs = append(errs, fmt.Sprintf("routes[%d].backend.addr: required", i))
|
errs = append(errs, fmt.Sprintf("routes[%d].backend.addr: required", i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// turn_discovery is optional, but when partially set (namespaces_dir XOR
|
||||||
|
// base_domain) it is almost certainly a misconfiguration, so validate the
|
||||||
|
// pair together via the library's own Validate.
|
||||||
|
if y.discoveryEnabled() || y.TURNDiscovery.BaseDomain != "" {
|
||||||
|
dc := sniproxy.TURNDiscoveryConfig{
|
||||||
|
NamespacesDir: y.TURNDiscovery.NamespacesDir,
|
||||||
|
BaseDomain: y.TURNDiscovery.BaseDomain,
|
||||||
|
}
|
||||||
|
errs = append(errs, dc.Validate()...)
|
||||||
|
}
|
||||||
return errs
|
return errs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -187,6 +187,69 @@ The legacy `db_execute` is kept indefinitely so existing functions don't break.
|
|||||||
|----------|-------------|
|
|----------|-------------|
|
||||||
| `pubsub_publish(topic, dataJSON)` → bool | Publish message to a PubSub topic. Returns true on success. |
|
| `pubsub_publish(topic, dataJSON)` → bool | Publish message to a PubSub topic. Returns true on success. |
|
||||||
|
|
||||||
|
### Ephemeral State (WS-subscribe-tracked)
|
||||||
|
|
||||||
|
Short-lived per-subscriber state (typing indicators, presence, call ringing,
|
||||||
|
live cursors) that the gateway **auto-clears the moment the owning WebSocket
|
||||||
|
client disconnects** — no heartbeats, no prune crons. State also expires on a
|
||||||
|
TTL backstop (default 60 s, max 30 min). The owning client ID and namespace
|
||||||
|
come from the server-trusted invocation context; functions cannot spoof them.
|
||||||
|
|
||||||
|
| Function | Description |
|
||||||
|
|----------|-------------|
|
||||||
|
| `ephemeral_state_set(topic, key, payload, ttlMs)` → u32 | Record state owned by the CURRENT invocation's WS client and publish an `ephemeral.set` event on the topic. 1 = ok, 0 = failure (no WS client, empty topic/key, payload > 16 KiB, > 256 keys/client). |
|
||||||
|
| `ephemeral_state_clear(topic, key)` → u32 | Clear state this client owns; publishes `ephemeral.clear` (reason `explicit`). Idempotent — clearing a missing/non-owned key returns 1. |
|
||||||
|
| `ephemeral_state_list(topic)` → u64 | Reconnect catch-up read: packed `ptr<<32\|len` of a JSON envelope with the live entries on the topic. Works without a WS client (read-only). 0 on failure. |
|
||||||
|
|
||||||
|
Raw import signatures (pointer/length ABI — note `ttlMs` is **i64**):
|
||||||
|
|
||||||
|
```go
|
||||||
|
//go:wasmimport env ephemeral_state_set
|
||||||
|
func ephemeralStateSet(topicPtr *byte, topicLen uint32, keyPtr *byte, keyLen uint32,
|
||||||
|
payloadPtr *byte, payloadLen uint32, ttlMs int64) uint32
|
||||||
|
|
||||||
|
//go:wasmimport env ephemeral_state_clear
|
||||||
|
func ephemeralStateClear(topicPtr *byte, topicLen uint32, keyPtr *byte, keyLen uint32) uint32
|
||||||
|
|
||||||
|
//go:wasmimport env ephemeral_state_list
|
||||||
|
func ephemeralStateList(topicPtr *byte, topicLen uint32) uint64 // ptr<<32|len of JSON
|
||||||
|
```
|
||||||
|
|
||||||
|
Synthetic events are published **on the same topic** the state lives on, with
|
||||||
|
the `_orama` control-frame discriminator (same dispatch pattern as the
|
||||||
|
`auth.refresh` frame). Subscribers update their local view from the stream:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"_orama":"ephemeral.set", "topic":"typing:room1", "key":"user-7", "client_id":"ws-abc", "payload":"<base64>"}
|
||||||
|
{"_orama":"ephemeral.clear","topic":"typing:room1", "key":"user-7", "client_id":"ws-abc", "reason":"disconnect"}
|
||||||
|
```
|
||||||
|
|
||||||
|
`reason` is `explicit` (function called clear), `disconnect` (owning WS client
|
||||||
|
went away — the zero-lag path), or `expired` (TTL backstop). `payload` is
|
||||||
|
base64 (Go `[]byte` JSON encoding) and present only on `ephemeral.set`.
|
||||||
|
|
||||||
|
`ephemeral_state_list` returns:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"entries":[{"key":"user-7","client_id":"ws-abc","payload":"<base64>","expires_in_ms":48211}]}
|
||||||
|
```
|
||||||
|
|
||||||
|
Typing-indicator shape (called from a `ws_persistent` rpc-router function):
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Client sends {"op":"typing.start","room":"room1","user":"user-7"} → handler:
|
||||||
|
ephemeralStateSet(ptr("typing:"+room), len32("typing:"+room),
|
||||||
|
ptr(userID), len32(userID), nil, 0, 30_000) // 30s TTL backstop
|
||||||
|
|
||||||
|
// Client sends typing.stop → handler:
|
||||||
|
ephemeralStateClear(ptr("typing:"+room), len32("typing:"+room), ptr(userID), len32(userID))
|
||||||
|
|
||||||
|
// No typing.stop needed on app kill / network drop: the WS disconnect publishes
|
||||||
|
// {"_orama":"ephemeral.clear",...,"reason":"disconnect"} to every subscriber
|
||||||
|
// immediately. On (re)connect, call ephemeral_state_list("typing:"+room) once
|
||||||
|
// to seed local state, then track the event stream.
|
||||||
|
```
|
||||||
|
|
||||||
### Logging
|
### Logging
|
||||||
|
|
||||||
| Function | Description |
|
| Function | Description |
|
||||||
|
|||||||
16
core/migrations/030_webrtc_stealth.sql
Normal file
16
core/migrations/030_webrtc_stealth.sql
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
-- =============================================================================
|
||||||
|
-- 030_webrtc_stealth.sql
|
||||||
|
--
|
||||||
|
-- Stealth TURNS-over-443 per namespace — feat-124 (censorship-resistant
|
||||||
|
-- calling). When stealth_enabled is true the namespace's TURN servers carry a
|
||||||
|
-- second TLS certificate for the neutral stealth hostname
|
||||||
|
-- (cdn-<hash>.<base-domain>, derived via turn.StealthHostForNamespace), the
|
||||||
|
-- SNI router forwards :443 ClientHellos for that hostname to the TURN TLS
|
||||||
|
-- listener, and turn.credentials advertises `turns:<stealth-host>:443` as the
|
||||||
|
-- final rung of the ICE URI ladder.
|
||||||
|
--
|
||||||
|
-- Default false → backward compatible: existing WebRTC namespaces keep the
|
||||||
|
-- baseline udp:3478 / tcp:3478 / turns:5349 URIs unchanged.
|
||||||
|
-- =============================================================================
|
||||||
|
|
||||||
|
ALTER TABLE namespace_webrtc_config ADD COLUMN stealth_enabled BOOLEAN DEFAULT FALSE;
|
||||||
@ -79,6 +79,8 @@ func showNamespaceHelp() {
|
|||||||
fmt.Printf(" repair <namespace> - Repair an under-provisioned namespace cluster\n")
|
fmt.Printf(" repair <namespace> - Repair an under-provisioned namespace cluster\n")
|
||||||
fmt.Printf(" enable webrtc --namespace NS - Enable WebRTC (SFU + TURN) for a namespace\n")
|
fmt.Printf(" enable webrtc --namespace NS - Enable WebRTC (SFU + TURN) for a namespace\n")
|
||||||
fmt.Printf(" disable webrtc --namespace NS - Disable WebRTC for a namespace\n")
|
fmt.Printf(" disable webrtc --namespace NS - Disable WebRTC for a namespace\n")
|
||||||
|
fmt.Printf(" enable webrtc-stealth --namespace NS - Enable stealth TURNS over :443 (feat-124)\n")
|
||||||
|
fmt.Printf(" disable webrtc-stealth --namespace NS - Disable stealth TURNS\n")
|
||||||
fmt.Printf(" webrtc-status --namespace NS - Show WebRTC service status\n")
|
fmt.Printf(" webrtc-status --namespace NS - Show WebRTC service status\n")
|
||||||
fmt.Printf(" help - Show this help message\n\n")
|
fmt.Printf(" help - Show this help message\n\n")
|
||||||
fmt.Printf("Flags:\n")
|
fmt.Printf("Flags:\n")
|
||||||
@ -226,8 +228,12 @@ func handleNamespaceDelete(force bool) {
|
|||||||
|
|
||||||
func handleNamespaceEnable(args []string) {
|
func handleNamespaceEnable(args []string) {
|
||||||
feature := args[0]
|
feature := args[0]
|
||||||
|
if feature == "webrtc-stealth" {
|
||||||
|
handleNamespaceStealthToggle(args[1:], true)
|
||||||
|
return
|
||||||
|
}
|
||||||
if feature != "webrtc" {
|
if feature != "webrtc" {
|
||||||
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc\n", feature)
|
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc, webrtc-stealth\n", feature)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,10 +289,82 @@ func handleNamespaceEnable(args []string) {
|
|||||||
fmt.Printf(" TURN instances: 2 nodes (relay on public IPs)\n")
|
fmt.Printf(" TURN instances: 2 nodes (relay on public IPs)\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleNamespaceStealthToggle drives /v1/namespace/webrtc/stealth/{enable|disable}
|
||||||
|
// (feat-124 — censorship-resistant TURNS over :443).
|
||||||
|
func handleNamespaceStealthToggle(args []string, enable bool) {
|
||||||
|
verb := "disable"
|
||||||
|
if enable {
|
||||||
|
verb = "enable"
|
||||||
|
}
|
||||||
|
|
||||||
|
var ns string
|
||||||
|
fs := flag.NewFlagSet("namespace "+verb+" webrtc-stealth", flag.ExitOnError)
|
||||||
|
fs.StringVar(&ns, "namespace", "", "Namespace name")
|
||||||
|
_ = fs.Parse(args)
|
||||||
|
|
||||||
|
if ns == "" {
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage: orama namespace %s webrtc-stealth --namespace <name>\n", verb)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
gatewayURL, apiKey := loadAuthForNamespace(ns)
|
||||||
|
|
||||||
|
if enable {
|
||||||
|
fmt.Printf("Enabling WebRTC stealth (TURNS over :443) for namespace '%s'...\n", ns)
|
||||||
|
fmt.Printf("This provisions a Let's Encrypt cert for the neutral stealth host and may take up to ~2 minutes.\n")
|
||||||
|
} else {
|
||||||
|
fmt.Printf("Disabling WebRTC stealth for namespace '%s'...\n", ns)
|
||||||
|
}
|
||||||
|
|
||||||
|
url := fmt.Sprintf("%s/v1/namespace/webrtc/stealth/%s", gatewayURL, verb)
|
||||||
|
req, err := http.NewRequest(http.MethodPost, url, nil)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Failed to create request: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
req.Header.Set("Authorization", "Bearer "+apiKey)
|
||||||
|
|
||||||
|
client := &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "Failed to connect to gateway: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
var result map[string]interface{}
|
||||||
|
json.NewDecoder(resp.Body).Decode(&result)
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
errMsg := "unknown error"
|
||||||
|
if e, ok := result["error"].(string); ok {
|
||||||
|
errMsg = e
|
||||||
|
}
|
||||||
|
fmt.Fprintf(os.Stderr, "Failed to %s WebRTC stealth: %s\n", verb, errMsg)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if enable {
|
||||||
|
fmt.Printf("WebRTC stealth enabled for namespace '%s'.\n", ns)
|
||||||
|
fmt.Printf(" turn.credentials now advertises the full URI ladder including turns:<stealth-host>:443.\n")
|
||||||
|
fmt.Printf(" Make sure the SNI router is enabled on the TURN nodes (node.yaml sni_router.enabled).\n")
|
||||||
|
} else {
|
||||||
|
fmt.Printf("WebRTC stealth disabled for namespace '%s'.\n", ns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func handleNamespaceDisable(args []string) {
|
func handleNamespaceDisable(args []string) {
|
||||||
feature := args[0]
|
feature := args[0]
|
||||||
|
if feature == "webrtc-stealth" {
|
||||||
|
handleNamespaceStealthToggle(args[1:], false)
|
||||||
|
return
|
||||||
|
}
|
||||||
if feature != "webrtc" {
|
if feature != "webrtc" {
|
||||||
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc\n", feature)
|
fmt.Fprintf(os.Stderr, "Unknown feature: %s\nSupported features: webrtc, webrtc-stealth\n", feature)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -230,9 +230,54 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri
|
|||||||
return "", fmt.Errorf("failed to populate webrtc config: %w", err)
|
return "", fmt.Errorf("failed to populate webrtc config: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stealth TURN SNI router (feat-124). Like the webrtc block, sni_router is
|
||||||
|
// an operator opt-in that only exists in the previous node.yaml, so carry
|
||||||
|
// it forward across regeneration. Without this, a Phase4 regen would reset
|
||||||
|
// sni_router.enabled to false, stop the :443 router and break stealth TURN
|
||||||
|
// for every region that relies on it (the same regen-wipe class of outage
|
||||||
|
// as bugboard #259/#846).
|
||||||
|
cg.populateSNIRouterConfig(&data)
|
||||||
|
|
||||||
return templates.RenderNodeConfig(data)
|
return templates.RenderNodeConfig(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// populateSNIRouterConfig carries forward the operator-set sni_router.enabled
|
||||||
|
// flag from the existing node.yaml so a config regeneration never silently
|
||||||
|
// disables the stealth TURN-over-443 router. Absence of the file or block
|
||||||
|
// leaves the flag at its default (false).
|
||||||
|
func (cg *ConfigGenerator) populateSNIRouterConfig(data *templates.NodeConfigData) {
|
||||||
|
data.SNIRouterEnabled = cg.readExistingSNIRouterEnabled()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SNIRouterEnabled reports whether the node's on-disk node.yaml has opted in to
|
||||||
|
// the stealth TURN-over-443 SNI router. The orchestrator reads this AFTER
|
||||||
|
// Phase4 has written node.yaml to decide whether to move Caddy to :8443 and
|
||||||
|
// start the router unit. Returns false when the config or block is absent.
|
||||||
|
func (cg *ConfigGenerator) SNIRouterEnabled() bool {
|
||||||
|
return cg.readExistingSNIRouterEnabled()
|
||||||
|
}
|
||||||
|
|
||||||
|
// readExistingSNIRouterEnabled parses just the top-level sni_router.enabled
|
||||||
|
// flag out of the existing node.yaml. Returns false when the file is missing,
|
||||||
|
// malformed, or has no sni_router block (fresh install / not opted in).
|
||||||
|
func (cg *ConfigGenerator) readExistingSNIRouterEnabled() bool {
|
||||||
|
configPath := filepath.Join(cg.oramaDir, "configs", "node.yaml")
|
||||||
|
raw, err := os.ReadFile(configPath)
|
||||||
|
if err != nil {
|
||||||
|
return false // No existing config (fresh install) — default off.
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsed struct {
|
||||||
|
SNIRouter struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
} `yaml:"sni_router"`
|
||||||
|
}
|
||||||
|
if err := yaml.Unmarshal(raw, &parsed); err != nil {
|
||||||
|
return false // Malformed/old config — don't fail regen; default off.
|
||||||
|
}
|
||||||
|
return parsed.SNIRouter.Enabled
|
||||||
|
}
|
||||||
|
|
||||||
// existingWebRTC is the minimal shape parsed out of an existing node.yaml to
|
// existingWebRTC is the minimal shape parsed out of an existing node.yaml to
|
||||||
// carry forward operator-set WebRTC fields across a config regeneration.
|
// carry forward operator-set WebRTC fields across a config regeneration.
|
||||||
type existingWebRTC struct {
|
type existingWebRTC struct {
|
||||||
|
|||||||
@ -24,6 +24,7 @@ type BinaryInstaller struct {
|
|||||||
coredns *installers.CoreDNSInstaller
|
coredns *installers.CoreDNSInstaller
|
||||||
caddy *installers.CaddyInstaller
|
caddy *installers.CaddyInstaller
|
||||||
ntfy *installers.NtfyInstaller // feature #72; installed only when EnableNtfy is set
|
ntfy *installers.NtfyInstaller // feature #72; installed only when EnableNtfy is set
|
||||||
|
sniRouter *installers.SNIRouterInstaller // feat-124; configured only when sni_router.enabled
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewBinaryInstaller creates a new binary installer
|
// NewBinaryInstaller creates a new binary installer
|
||||||
@ -41,6 +42,7 @@ func NewBinaryInstaller(arch string, logWriter io.Writer) *BinaryInstaller {
|
|||||||
coredns: installers.NewCoreDNSInstaller(arch, logWriter, oramaHome),
|
coredns: installers.NewCoreDNSInstaller(arch, logWriter, oramaHome),
|
||||||
caddy: installers.NewCaddyInstaller(arch, logWriter, oramaHome),
|
caddy: installers.NewCaddyInstaller(arch, logWriter, oramaHome),
|
||||||
ntfy: installers.NewNtfyInstaller(arch, logWriter),
|
ntfy: installers.NewNtfyInstaller(arch, logWriter),
|
||||||
|
sniRouter: installers.NewSNIRouterInstaller(arch, logWriter, OramaDir),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -158,6 +160,29 @@ func (bi *BinaryInstaller) EnableCaddyNtfyProxy(hostname string) {
|
|||||||
bi.caddy.EnableNtfyProxy(hostname)
|
bi.caddy.EnableNtfyProxy(hostname)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EnableCaddySNIRouterMode moves Caddy's HTTPS listener off :443 to :8443 on
|
||||||
|
// the next ConfigureCaddy() call, freeing :443 for the orama-sni-router
|
||||||
|
// (feat-124). Must be called BEFORE ConfigureCaddy.
|
||||||
|
func (bi *BinaryInstaller) EnableCaddySNIRouterMode() {
|
||||||
|
bi.caddy.EnableSNIRouterMode()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ConfigureSNIRouter writes the orama-sni-router YAML config (listen :443,
|
||||||
|
// fallback Caddy on :8443, turn_discovery for baseDomain). Feat-124.
|
||||||
|
func (bi *BinaryInstaller) ConfigureSNIRouter(baseDomain string) error {
|
||||||
|
return bi.sniRouter.Configure(baseDomain)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteSNIRouterUnit writes /etc/systemd/system/orama-sni-router.service.
|
||||||
|
func (bi *BinaryInstaller) WriteSNIRouterUnit() error {
|
||||||
|
return bi.sniRouter.WriteSystemdUnit()
|
||||||
|
}
|
||||||
|
|
||||||
|
// SNIRouterServiceName returns the systemd unit name for lifecycle calls.
|
||||||
|
func (bi *BinaryInstaller) SNIRouterServiceName() string {
|
||||||
|
return installers.SNIRouterServiceName
|
||||||
|
}
|
||||||
|
|
||||||
// InstallNtfy installs the self-hosted ntfy server (binary, user,
|
// InstallNtfy installs the self-hosted ntfy server (binary, user,
|
||||||
// systemd unit, data directory). Feature #72. Idempotent.
|
// systemd unit, data directory). Feature #72. Idempotent.
|
||||||
func (bi *BinaryInstaller) InstallNtfy() error {
|
func (bi *BinaryInstaller) InstallNtfy() error {
|
||||||
|
|||||||
@ -27,8 +27,20 @@ type CaddyInstaller struct {
|
|||||||
// Enabled per-node via EnableNtfyProxy. Feature #72.
|
// Enabled per-node via EnableNtfyProxy. Feature #72.
|
||||||
withNtfy bool
|
withNtfy bool
|
||||||
ntfyHostname string // e.g. "push.dbrs.space" — fully-qualified public host
|
ntfyHostname string // e.g. "push.dbrs.space" — fully-qualified public host
|
||||||
|
|
||||||
|
// behindSNIRouter, when set, moves Caddy's HTTPS listener off :443 to
|
||||||
|
// CaddyHTTPSPortBehindSNI so the orama-sni-router can own :443 and forward
|
||||||
|
// TLS by SNI (feat-124, stealth TURN). Enabled per-node via
|
||||||
|
// EnableSNIRouterMode. Plain HTTP (:80) is unaffected. When false the
|
||||||
|
// generated Caddyfile is byte-identical to the pre-feature output.
|
||||||
|
behindSNIRouter bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CaddyHTTPSPortBehindSNI is the port Caddy binds for HTTPS when the node runs
|
||||||
|
// behind the SNI router (which owns :443). 8443 matches the sni-router config's
|
||||||
|
// caddy fallback backend (127.0.0.1:8443) and the plan doc.
|
||||||
|
const CaddyHTTPSPortBehindSNI = 8443
|
||||||
|
|
||||||
// NewCaddyInstaller creates a new Caddy installer
|
// NewCaddyInstaller creates a new Caddy installer
|
||||||
func NewCaddyInstaller(arch string, logWriter io.Writer, oramaHome string) *CaddyInstaller {
|
func NewCaddyInstaller(arch string, logWriter io.Writer, oramaHome string) *CaddyInstaller {
|
||||||
return &CaddyInstaller{
|
return &CaddyInstaller{
|
||||||
@ -52,6 +64,16 @@ func (ci *CaddyInstaller) EnableNtfyProxy(hostname string) {
|
|||||||
ci.ntfyHostname = hostname
|
ci.ntfyHostname = hostname
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EnableSNIRouterMode tells the Caddy installer to bind HTTPS on
|
||||||
|
// CaddyHTTPSPortBehindSNI (8443) instead of :443, freeing :443 for the
|
||||||
|
// orama-sni-router (feat-124). Plain HTTP on :80 is left untouched. Must be
|
||||||
|
// called BEFORE Configure so the generated Caddyfile picks up the global
|
||||||
|
// `https_port` option. A no-op when never called: the default Caddyfile keeps
|
||||||
|
// HTTPS on :443.
|
||||||
|
func (ci *CaddyInstaller) EnableSNIRouterMode() {
|
||||||
|
ci.behindSNIRouter = true
|
||||||
|
}
|
||||||
|
|
||||||
// IsInstalled checks if Caddy with orama DNS module is already installed
|
// IsInstalled checks if Caddy with orama DNS module is already installed
|
||||||
func (ci *CaddyInstaller) IsInstalled() bool {
|
func (ci *CaddyInstaller) IsInstalled() bool {
|
||||||
caddyPath := "/usr/bin/caddy"
|
caddyPath := "/usr/bin/caddy"
|
||||||
@ -417,7 +439,17 @@ func (ci *CaddyInstaller) generateCaddyfile(domain, email, acmeEndpoint, baseDom
|
|||||||
// workload is REST + WebSocket (neither benefits much from
|
// workload is REST + WebSocket (neither benefits much from
|
||||||
// h2 stream multiplexing — REST is keep-alive over h1, and
|
// h2 stream multiplexing — REST is keep-alive over h1, and
|
||||||
// WS is single-connection by design).
|
// WS is single-connection by design).
|
||||||
sb.WriteString(fmt.Sprintf("{\n email %s\n servers {\n protocols h1\n }\n}\n", email))
|
// When this node runs behind the SNI router (feat-124), move Caddy's HTTPS
|
||||||
|
// listener off :443 to CaddyHTTPSPortBehindSNI via the `https_port` global
|
||||||
|
// option. The sni-router owns :443 and forwards TLS by SNI to either a
|
||||||
|
// namespace's TURNS listener or here (127.0.0.1:8443). Plain HTTP (:80) is
|
||||||
|
// unchanged. When behindSNIRouter is false, no `https_port` line is emitted
|
||||||
|
// and the Caddyfile is byte-identical to the pre-feature output.
|
||||||
|
httpsPortOption := ""
|
||||||
|
if ci.behindSNIRouter {
|
||||||
|
httpsPortOption = fmt.Sprintf(" https_port %d\n", CaddyHTTPSPortBehindSNI)
|
||||||
|
}
|
||||||
|
sb.WriteString(fmt.Sprintf("{\n email %s\n%s servers {\n protocols h1\n }\n}\n", email, httpsPortOption))
|
||||||
|
|
||||||
// Node domain blocks (e.g., node1.dbrs.space, *.node1.dbrs.space)
|
// Node domain blocks (e.g., node1.dbrs.space, *.node1.dbrs.space)
|
||||||
sb.WriteString(fmt.Sprintf("\n*.%s {\n%s\n reverse_proxy localhost:6001\n}\n", domain, tlsBlock))
|
sb.WriteString(fmt.Sprintf("\n*.%s {\n%s\n reverse_proxy localhost:6001\n}\n", domain, tlsBlock))
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
package installers
|
package installers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
@ -97,3 +98,50 @@ func TestGenerateCaddyfile_BaseDomainSameAsDomainOmitsDuplicates(t *testing.T) {
|
|||||||
t.Errorf("expected exactly 2 `*.dbrs.space {` occurrences (1 TLS + 1 HTTP), got %d in:\n%s", got, cf)
|
t.Errorf("expected exactly 2 `*.dbrs.space {` occurrences (1 TLS + 1 HTTP), got %d in:\n%s", got, cf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestGenerateCaddyfile_SNIRouterDisabledByteIdentical is the safety guard for
|
||||||
|
// feat-124: when EnableSNIRouterMode has NOT been called, the generated
|
||||||
|
// Caddyfile must be byte-identical to the pre-feature output (HTTPS stays on
|
||||||
|
// :443, no `https_port` global option). This is the default for every existing
|
||||||
|
// node — any drift here is a silent production change.
|
||||||
|
func TestGenerateCaddyfile_SNIRouterDisabledByteIdentical(t *testing.T) {
|
||||||
|
ci := newTestCaddyInstaller()
|
||||||
|
cf := ci.generateCaddyfile("node1.dbrs.space", "admin@dbrs.space",
|
||||||
|
"http://localhost:6001/v1/internal/acme", "dbrs.space")
|
||||||
|
|
||||||
|
if strings.Contains(cf, "https_port") {
|
||||||
|
t.Errorf("default Caddyfile must NOT contain `https_port` (SNI router off); got:\n%s", cf)
|
||||||
|
}
|
||||||
|
if strings.Contains(cf, "8443") {
|
||||||
|
t.Errorf("default Caddyfile must NOT reference :8443 (SNI router off); got:\n%s", cf)
|
||||||
|
}
|
||||||
|
// The global options block must be exactly the pre-feature shape.
|
||||||
|
if !strings.Contains(cf, "{\n email admin@dbrs.space\n servers {\n protocols h1\n }\n}\n") {
|
||||||
|
t.Errorf("default global options block drifted from pre-feature output; got:\n%s", cf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGenerateCaddyfile_SNIRouterEnabledMovesHTTPSTo8443 verifies that after
|
||||||
|
// EnableSNIRouterMode, Caddy's HTTPS listener is moved to :8443 via the
|
||||||
|
// `https_port` global option, while plain HTTP (:80) is unchanged so ACME
|
||||||
|
// HTTP-01 and the HTTP catch-all still work.
|
||||||
|
func TestGenerateCaddyfile_SNIRouterEnabledMovesHTTPSTo8443(t *testing.T) {
|
||||||
|
ci := newTestCaddyInstaller()
|
||||||
|
ci.EnableSNIRouterMode()
|
||||||
|
cf := ci.generateCaddyfile("node1.dbrs.space", "admin@dbrs.space",
|
||||||
|
"http://localhost:6001/v1/internal/acme", "dbrs.space")
|
||||||
|
|
||||||
|
want := fmt.Sprintf("https_port %d", CaddyHTTPSPortBehindSNI)
|
||||||
|
if !strings.Contains(cf, want) {
|
||||||
|
t.Errorf("SNI-router Caddyfile must contain %q; got:\n%s", want, cf)
|
||||||
|
}
|
||||||
|
// The global option belongs inside the top-level options block, before the
|
||||||
|
// servers stanza.
|
||||||
|
if !strings.Contains(cf, "{\n email admin@dbrs.space\n https_port 8443\n servers {\n protocols h1\n }\n}\n") {
|
||||||
|
t.Errorf("https_port not placed correctly in global options block; got:\n%s", cf)
|
||||||
|
}
|
||||||
|
// Plain HTTP :80 catch-all must be unchanged.
|
||||||
|
if !strings.Contains(cf, ":80 {") {
|
||||||
|
t.Errorf("HTTP :80 block must remain when SNI router enabled; got:\n%s", cf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
203
core/pkg/environments/production/installers/sni_router.go
Normal file
203
core/pkg/environments/production/installers/sni_router.go
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
package installers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SNI router installer (feat-124, stealth TURN-over-443).
|
||||||
|
//
|
||||||
|
// Unlike the binary installers (Caddy, ntfy), the orama-sni-router binary is
|
||||||
|
// built and shipped to the node by `orama build` / the install tarball — this
|
||||||
|
// installer only writes the router's YAML config and the systemd unit, and
|
||||||
|
// drives the unit's lifecycle (install+enable+start when enabled,
|
||||||
|
// stop+disable when not).
|
||||||
|
|
||||||
|
const (
|
||||||
|
// SNIRouterListenAddr is the public port the router binds. It owns :443 so
|
||||||
|
// Caddy is moved to CaddyHTTPSPortBehindSNI (see caddy.go).
|
||||||
|
SNIRouterListenAddr = ":443"
|
||||||
|
|
||||||
|
// SNIRouterServiceName is the systemd unit name.
|
||||||
|
SNIRouterServiceName = "orama-sni-router.service"
|
||||||
|
|
||||||
|
// SNIRouterConfigName is the router config filename (resolved under
|
||||||
|
// <oramaDir>/configs by the binary's config.DefaultPath lookup).
|
||||||
|
SNIRouterConfigName = "sni-router.yaml"
|
||||||
|
|
||||||
|
// sniRouterRescanInterval is how often the router rescans the namespaces
|
||||||
|
// directory for per-namespace TURNS listeners. Matches the library default
|
||||||
|
// (sniproxy.DefaultDiscoveryRescanInterval); kept as a literal here to avoid
|
||||||
|
// importing the runtime package into the installer.
|
||||||
|
sniRouterRescanInterval = "30s"
|
||||||
|
|
||||||
|
// sniRouterClientHelloTimeout / sniRouterBackendDialTimeout bound the
|
||||||
|
// per-connection ClientHello peek and backend dial (slowloris / dead-backend
|
||||||
|
// protection). Mirror the sniproxy server defaults.
|
||||||
|
sniRouterClientHelloTimeout = "5s"
|
||||||
|
sniRouterBackendDialTimeout = "5s"
|
||||||
|
|
||||||
|
// sniRouterMaxConcurrentConns caps in-flight connections on the public
|
||||||
|
// :443 listener (DoS guard); mirrors the sniproxy server default.
|
||||||
|
sniRouterMaxConcurrentConns = 10000
|
||||||
|
|
||||||
|
// sniRouterSystemdUnitPath is where the unit file is written.
|
||||||
|
sniRouterSystemdUnitPath = "/etc/systemd/system/" + SNIRouterServiceName
|
||||||
|
|
||||||
|
// sniRouterBinaryPath is the installed binary path on the node.
|
||||||
|
sniRouterBinaryPath = "/opt/orama/bin/orama-sni-router"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SNIRouterInstaller writes the orama-sni-router config + systemd unit and
|
||||||
|
// manages the unit lifecycle. The caddy fallback port matches
|
||||||
|
// CaddyHTTPSPortBehindSNI so unmatched SNIs (regular HTTPS) reach the moved
|
||||||
|
// Caddy listener.
|
||||||
|
type SNIRouterInstaller struct {
|
||||||
|
*BaseInstaller
|
||||||
|
oramaDir string // e.g. "/opt/orama/.orama"
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSNIRouterInstaller creates an installer. oramaDir is the node's .orama
|
||||||
|
// data root (where configs/ and data/namespaces live).
|
||||||
|
func NewSNIRouterInstaller(arch string, logWriter io.Writer, oramaDir string) *SNIRouterInstaller {
|
||||||
|
return &SNIRouterInstaller{
|
||||||
|
BaseInstaller: NewBaseInstaller(arch, logWriter),
|
||||||
|
oramaDir: oramaDir,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// configPath returns the absolute path the router config is written to and the
|
||||||
|
// binary resolves to via its DefaultPath lookup (<oramaDir>/configs/<name>).
|
||||||
|
func (si *SNIRouterInstaller) configPath() string {
|
||||||
|
return filepath.Join(si.oramaDir, "configs", SNIRouterConfigName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// namespacesDir returns the per-namespace config root the router scans for
|
||||||
|
// TURNS listeners.
|
||||||
|
func (si *SNIRouterInstaller) namespacesDir() string {
|
||||||
|
return filepath.Join(si.oramaDir, "data", "namespaces")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure writes the router YAML config. baseDomain drives the stealth and
|
||||||
|
// "turn.ns-*" SNI hostnames the router derives during discovery. Idempotent.
|
||||||
|
func (si *SNIRouterInstaller) Configure(baseDomain string) error {
|
||||||
|
if baseDomain == "" {
|
||||||
|
return fmt.Errorf("sni-router: base domain must not be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
configDir := filepath.Dir(si.configPath())
|
||||||
|
if err := os.MkdirAll(configDir, 0755); err != nil {
|
||||||
|
return fmt.Errorf("sni-router: create config dir %s: %w", configDir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
content := si.generateConfig(baseDomain)
|
||||||
|
if err := os.WriteFile(si.configPath(), []byte(content), 0644); err != nil {
|
||||||
|
return fmt.Errorf("sni-router: write config %s: %w", si.configPath(), err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateConfig renders the sni-router.yaml. The fallback is Caddy on
|
||||||
|
// CaddyHTTPSPortBehindSNI; turn_discovery scans the node's namespaces dir so
|
||||||
|
// per-namespace TURNS routes appear without a router restart. No static routes
|
||||||
|
// are emitted — every TURNS route is auto-discovered.
|
||||||
|
func (si *SNIRouterInstaller) generateConfig(baseDomain string) string {
|
||||||
|
return fmt.Sprintf(`# Orama SNI router config (feat-124, stealth TURN-over-443).
|
||||||
|
# Generated by the installer — re-running install/upgrade overwrites this file.
|
||||||
|
#
|
||||||
|
# The router owns :443, peeks each connection's TLS ClientHello SNI, and
|
||||||
|
# forwards the raw (still-encrypted) stream to a backend. TLS is NOT terminated
|
||||||
|
# here. Unmatched SNIs (regular HTTPS) go to the fallback (Caddy on :%[2]d).
|
||||||
|
listen: "%[1]s"
|
||||||
|
client_hello_timeout: %[3]s
|
||||||
|
backend_dial_timeout: %[4]s
|
||||||
|
max_concurrent_conns: %[5]d
|
||||||
|
|
||||||
|
fallback:
|
||||||
|
name: caddy
|
||||||
|
addr: "127.0.0.1:%[2]d"
|
||||||
|
|
||||||
|
# Per-namespace stealth-TURN routes are auto-discovered by scanning
|
||||||
|
# <namespaces_dir>/*/configs/turn-*.yaml every rescan_interval. Each namespace
|
||||||
|
# with a TURNS listener gets two routes (the bland stealth host and a
|
||||||
|
# turn.ns-<namespace>.<base_domain> alias) forwarding to its local TURNS port.
|
||||||
|
turn_discovery:
|
||||||
|
namespaces_dir: %[6]q
|
||||||
|
base_domain: %[7]q
|
||||||
|
rescan_interval: %[8]s
|
||||||
|
|
||||||
|
# No static routes: every TURNS route comes from turn_discovery above.
|
||||||
|
routes: []
|
||||||
|
`,
|
||||||
|
SNIRouterListenAddr,
|
||||||
|
CaddyHTTPSPortBehindSNI,
|
||||||
|
sniRouterClientHelloTimeout,
|
||||||
|
sniRouterBackendDialTimeout,
|
||||||
|
sniRouterMaxConcurrentConns,
|
||||||
|
si.namespacesDir(),
|
||||||
|
baseDomain,
|
||||||
|
sniRouterRescanInterval,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateSystemdUnit renders /etc/systemd/system/orama-sni-router.service.
|
||||||
|
// Runs as the orama user with CAP_NET_BIND_SERVICE so it can bind :443 without
|
||||||
|
// root. Ordered Before=caddy.service so the router is ready before Caddy
|
||||||
|
// switches to :8443. Restart=on-failure.
|
||||||
|
func (si *SNIRouterInstaller) generateSystemdUnit() string {
|
||||||
|
return fmt.Sprintf(`[Unit]
|
||||||
|
Description=Orama SNI Router (TLS-level :443 → backend forwarder)
|
||||||
|
Documentation=https://github.com/DeBrosOfficial/network
|
||||||
|
After=network.target
|
||||||
|
Before=caddy.service
|
||||||
|
PartOf=orama-node.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
WorkingDirectory=/opt/orama
|
||||||
|
EnvironmentFile=-/opt/orama/.orama/data/sni-router.env
|
||||||
|
ExecStart=%s --config %s
|
||||||
|
|
||||||
|
# Bind privileged ports (:80, :443) without running as root.
|
||||||
|
AmbientCapabilities=CAP_NET_BIND_SERVICE
|
||||||
|
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
|
||||||
|
|
||||||
|
User=orama
|
||||||
|
Group=orama
|
||||||
|
NoNewPrivileges=yes
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=yes
|
||||||
|
PrivateTmp=yes
|
||||||
|
LimitNOFILE=65536
|
||||||
|
|
||||||
|
TimeoutStopSec=15s
|
||||||
|
KillMode=mixed
|
||||||
|
KillSignal=SIGTERM
|
||||||
|
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5s
|
||||||
|
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=orama-sni-router
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
`, sniRouterBinaryPath, si.configPath())
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteSystemdUnit writes the unit file. Idempotent.
|
||||||
|
func (si *SNIRouterInstaller) WriteSystemdUnit() error {
|
||||||
|
if err := os.WriteFile(sniRouterSystemdUnitPath, []byte(si.generateSystemdUnit()), 0644); err != nil {
|
||||||
|
return fmt.Errorf("sni-router: write systemd unit %s: %w", sniRouterSystemdUnitPath, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsInstalled reports whether the router binary is present on the node.
|
||||||
|
func (si *SNIRouterInstaller) IsInstalled() bool {
|
||||||
|
_, err := os.Stat(sniRouterBinaryPath)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
102
core/pkg/environments/production/installers/sni_router_test.go
Normal file
102
core/pkg/environments/production/installers/sni_router_test.go
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
package installers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newTestSNIRouterInstaller returns an installer rooted at a temp oramaDir so
|
||||||
|
// Configure writes to an isolated location.
|
||||||
|
func newTestSNIRouterInstaller(oramaDir string) *SNIRouterInstaller {
|
||||||
|
return NewSNIRouterInstaller("amd64", io.Discard, oramaDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGenerateConfig_includesDiscoveryAndFallback verifies the rendered
|
||||||
|
// sni-router.yaml binds :443, falls back to Caddy on the moved HTTPS port, and
|
||||||
|
// emits a turn_discovery block pointing at the node's namespaces dir + base
|
||||||
|
// domain.
|
||||||
|
func TestGenerateConfig_includesDiscoveryAndFallback(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
si := newTestSNIRouterInstaller(dir)
|
||||||
|
|
||||||
|
cfg := si.generateConfig("orama-devnet.network")
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
`listen: ":443"`,
|
||||||
|
"fallback:",
|
||||||
|
`addr: "127.0.0.1:8443"`,
|
||||||
|
"turn_discovery:",
|
||||||
|
"base_domain: \"orama-devnet.network\"",
|
||||||
|
"rescan_interval: 30s",
|
||||||
|
"routes: []",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(cfg, want) {
|
||||||
|
t.Errorf("generated sni-router config missing %q\n---\n%s", want, cfg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// namespaces_dir must be the node's data/namespaces path.
|
||||||
|
wantNS := filepath.Join(dir, "data", "namespaces")
|
||||||
|
if !strings.Contains(cfg, wantNS) {
|
||||||
|
t.Errorf("config missing namespaces_dir %q\n---\n%s", wantNS, cfg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestConfigure_writesFileToConfigsDir verifies Configure persists the YAML to
|
||||||
|
// <oramaDir>/configs/sni-router.yaml.
|
||||||
|
func TestConfigure_writesFileToConfigsDir(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
si := newTestSNIRouterInstaller(dir)
|
||||||
|
|
||||||
|
if err := si.Configure("example.com"); err != nil {
|
||||||
|
t.Fatalf("Configure failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
path := filepath.Join(dir, "configs", "sni-router.yaml")
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected config at %s: %v", path, err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(string(data), "base_domain: \"example.com\"") {
|
||||||
|
t.Errorf("written config missing base_domain; got:\n%s", string(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestConfigure_rejectsEmptyBaseDomain verifies the installer refuses an empty
|
||||||
|
// base domain rather than emitting a config that would derive bogus hostnames.
|
||||||
|
func TestConfigure_rejectsEmptyBaseDomain(t *testing.T) {
|
||||||
|
si := newTestSNIRouterInstaller(t.TempDir())
|
||||||
|
if err := si.Configure(""); err == nil {
|
||||||
|
t.Errorf("expected error for empty base domain")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGenerateSystemdUnit_shape verifies the unit grants CAP_NET_BIND_SERVICE,
|
||||||
|
// runs as orama, restarts on failure, and points ExecStart at the installed
|
||||||
|
// binary + config.
|
||||||
|
func TestGenerateSystemdUnit_shape(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
si := newTestSNIRouterInstaller(dir)
|
||||||
|
unit := si.generateSystemdUnit()
|
||||||
|
|
||||||
|
for _, want := range []string{
|
||||||
|
"AmbientCapabilities=CAP_NET_BIND_SERVICE",
|
||||||
|
"User=orama",
|
||||||
|
"Restart=on-failure",
|
||||||
|
"EnvironmentFile=-/opt/orama/.orama/data/sni-router.env",
|
||||||
|
// ExecStart must point at the ABSOLUTE config path so it doesn't
|
||||||
|
// depend on WorkingDirectory/$HOME resolution at runtime.
|
||||||
|
"ExecStart=/opt/orama/bin/orama-sni-router --config " + si.configPath(),
|
||||||
|
"Before=caddy.service",
|
||||||
|
} {
|
||||||
|
if !strings.Contains(unit, want) {
|
||||||
|
t.Errorf("systemd unit missing %q\n---\n%s", want, unit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !strings.Contains(si.configPath(), dir) {
|
||||||
|
t.Errorf("configPath %q not rooted at the oramaDir %q", si.configPath(), dir)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -741,11 +741,35 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s
|
|||||||
ps.logf(" ✓ ntfy config generated (base_url: %s)", ntfyBaseURL)
|
ps.logf(" ✓ ntfy config generated (base_url: %s)", ntfyBaseURL)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stealth TURN-over-443 (feat-124): when the node opted in
|
||||||
|
// (sni_router.enabled in the node.yaml just written above), Caddy
|
||||||
|
// must vacate :443 so the orama-sni-router can own it. Move Caddy's
|
||||||
|
// HTTPS listener to :8443 BEFORE ConfigureCaddy renders the Caddyfile.
|
||||||
|
// When not opted in, the Caddyfile is byte-identical to before.
|
||||||
|
if ps.configGenerator.SNIRouterEnabled() {
|
||||||
|
ps.binaryInstaller.EnableCaddySNIRouterMode()
|
||||||
|
ps.logf(" ✓ SNI router enabled — Caddy HTTPS will bind :8443")
|
||||||
|
}
|
||||||
|
|
||||||
if err := ps.binaryInstaller.ConfigureCaddy(caddyDomain, email, acmeEndpoint, baseDomain); err != nil {
|
if err := ps.binaryInstaller.ConfigureCaddy(caddyDomain, email, acmeEndpoint, baseDomain); err != nil {
|
||||||
ps.logf(" ⚠️ Caddy config warning: %v", err)
|
ps.logf(" ⚠️ Caddy config warning: %v", err)
|
||||||
} else {
|
} else {
|
||||||
ps.logf(" ✓ Caddy config generated")
|
ps.logf(" ✓ Caddy config generated")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stealth TURN-over-443 (feat-124): when opted in, write the
|
||||||
|
// orama-sni-router config (listen :443, fallback Caddy :8443,
|
||||||
|
// turn_discovery scanning this node's namespaces dir for the cluster's
|
||||||
|
// base domain). The unit lifecycle is driven in Phase5 after Caddy has
|
||||||
|
// moved to :8443. The router uses the base domain as the zone for
|
||||||
|
// stealth/turn.ns-* hostnames.
|
||||||
|
if ps.configGenerator.SNIRouterEnabled() {
|
||||||
|
if err := ps.binaryInstaller.ConfigureSNIRouter(dnsZone); err != nil {
|
||||||
|
ps.logf(" ⚠️ SNI router config warning: %v", err)
|
||||||
|
} else {
|
||||||
|
ps.logf(" ✓ SNI router config generated (zone: %s)", dnsZone)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -871,6 +895,14 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SNI router unit (feat-124). Write the unit whenever the binary is present
|
||||||
|
// so the daemon-reload below picks it up; the enable/start vs stop/disable
|
||||||
|
// decision (based on sni_router.enabled) happens after Caddy has moved to
|
||||||
|
// :8443, in the start section.
|
||||||
|
if ps.binaryInstaller.WriteSNIRouterUnit() == nil {
|
||||||
|
ps.logf(" ✓ SNI router service unit created: %s", ps.binaryInstaller.SNIRouterServiceName())
|
||||||
|
}
|
||||||
|
|
||||||
// Reload systemd daemon
|
// Reload systemd daemon
|
||||||
if err := ps.serviceController.DaemonReload(); err != nil {
|
if err := ps.serviceController.DaemonReload(); err != nil {
|
||||||
return fmt.Errorf("failed to reload systemd: %w", err)
|
return fmt.Errorf("failed to reload systemd: %w", err)
|
||||||
@ -980,6 +1012,31 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stealth TURN-over-443 (feat-124) cutover. Caddy has just been
|
||||||
|
// reconfigured to :8443 and restarted above, so :443 is now free for the
|
||||||
|
// SNI router. When opted in, enable+start the router; when not, stop+disable
|
||||||
|
// it so a node that flipped the flag off cleanly returns :443 to Caddy.
|
||||||
|
sniSvc := ps.binaryInstaller.SNIRouterServiceName()
|
||||||
|
if ps.configGenerator.SNIRouterEnabled() {
|
||||||
|
if err := ps.serviceController.EnableService(sniSvc); err != nil {
|
||||||
|
ps.logf(" ⚠️ Failed to enable %s: %v", sniSvc, err)
|
||||||
|
}
|
||||||
|
if err := ps.serviceController.RestartService(sniSvc); err != nil {
|
||||||
|
ps.logf(" ⚠️ Failed to start %s: %v", sniSvc, err)
|
||||||
|
} else {
|
||||||
|
ps.logf(" - %s started (owns :443)", sniSvc)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Not opted in: ensure the router is not holding :443. Errors are
|
||||||
|
// non-fatal — the unit may simply not be loaded on this node.
|
||||||
|
if err := ps.serviceController.StopService(sniSvc); err != nil {
|
||||||
|
ps.logf(" ℹ️ %s not running (expected when disabled): %v", sniSvc, err)
|
||||||
|
}
|
||||||
|
if err := ps.serviceController.DisableService(sniSvc); err != nil {
|
||||||
|
ps.logf(" ℹ️ %s not enabled (expected when disabled): %v", sniSvc, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Start ntfy on every node (#72). Caddy must already be up (it
|
// Start ntfy on every node (#72). Caddy must already be up (it
|
||||||
// terminates TLS for push.<dnsZone>), which the order above
|
// terminates TLS for push.<dnsZone>), which the order above
|
||||||
// guarantees.
|
// guarantees.
|
||||||
|
|||||||
72
core/pkg/environments/production/sni_router_test.go
Normal file
72
core/pkg/environments/production/sni_router_test.go
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
package production
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestGenerateNodeConfig_preservesSNIRouterEnabled is the regression test for
|
||||||
|
// the feat-124 regen-wipe class of outage (cf. bugboard #259/#846 for webrtc):
|
||||||
|
// a config regeneration must NOT silently reset an operator's
|
||||||
|
// sni_router.enabled: true back to false, which would stop the :443 router and
|
||||||
|
// break stealth TURN. We write a node.yaml with the flag set, regenerate, and
|
||||||
|
// assert it survives.
|
||||||
|
func TestGenerateNodeConfig_preservesSNIRouterEnabled(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNodeYAML(t, dir, `sni_router:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
http_gateway:
|
||||||
|
enabled: true
|
||||||
|
`)
|
||||||
|
|
||||||
|
cg := NewConfigGenerator(dir)
|
||||||
|
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GenerateNodeConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.Contains(out, "sni_router:") {
|
||||||
|
t.Fatalf("regenerated node.yaml missing sni_router block\n---\n%s", out)
|
||||||
|
}
|
||||||
|
if !strings.Contains(out, "enabled: true") {
|
||||||
|
t.Errorf("regenerated node.yaml did not preserve sni_router.enabled: true\n---\n%s", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGenerateNodeConfig_sniRouterDefaultsFalse verifies a fresh install (no
|
||||||
|
// existing node.yaml) renders sni_router.enabled: false — default OFF.
|
||||||
|
func TestGenerateNodeConfig_sniRouterDefaultsFalse(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
cg := NewConfigGenerator(dir)
|
||||||
|
|
||||||
|
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GenerateNodeConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(out, "sni_router:") {
|
||||||
|
t.Fatalf("node.yaml missing sni_router block\n---\n%s", out)
|
||||||
|
}
|
||||||
|
if !strings.Contains(out, "enabled: false") {
|
||||||
|
t.Errorf("fresh node.yaml should render sni_router.enabled: false\n---\n%s", out)
|
||||||
|
}
|
||||||
|
if cg.SNIRouterEnabled() {
|
||||||
|
t.Errorf("SNIRouterEnabled() should be false on a fresh install")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGenerateNodeConfig_sniRouterDisabledStaysFalse verifies an existing
|
||||||
|
// node.yaml that explicitly disabled the router does not flip on during regen.
|
||||||
|
func TestGenerateNodeConfig_sniRouterDisabledStaysFalse(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeNodeYAML(t, dir, "sni_router:\n enabled: false\nhttp_gateway:\n enabled: true\n")
|
||||||
|
|
||||||
|
cg := NewConfigGenerator(dir)
|
||||||
|
out, err := cg.GenerateNodeConfig(nil, "10.0.0.5", "", "node-1.dbrs.space", "dbrs.space", false)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GenerateNodeConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(out, "enabled: false") {
|
||||||
|
t.Errorf("disabled sni_router should stay false on regen\n---\n%s", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -15,6 +15,14 @@ node:
|
|||||||
operator_wallet: "{{.OperatorWallet}}"
|
operator_wallet: "{{.OperatorWallet}}"
|
||||||
{{- end}}
|
{{- end}}
|
||||||
|
|
||||||
|
# Stealth TURN-over-443 SNI router (feat-124). When enabled, the node runs
|
||||||
|
# orama-sni-router on :443 and Caddy is moved to :8443; default-OFF so existing
|
||||||
|
# nodes are byte-identical until an operator opts in. This block is preserved
|
||||||
|
# across config regeneration (GenerateNodeConfig carries forward an existing
|
||||||
|
# sni_router.enabled: true).
|
||||||
|
sni_router:
|
||||||
|
enabled: {{if .SNIRouterEnabled}}true{{else}}false{{end}}
|
||||||
|
|
||||||
database:
|
database:
|
||||||
data_dir: "{{.DataDir}}/rqlite"
|
data_dir: "{{.DataDir}}/rqlite"
|
||||||
replication_factor: 3
|
replication_factor: 3
|
||||||
|
|||||||
@ -66,6 +66,16 @@ type NodeConfigData struct {
|
|||||||
SFUPort int // Local SFU signaling port the gateway proxies to
|
SFUPort int // Local SFU signaling port the gateway proxies to
|
||||||
TURNDomain string // TURN domain (e.g., "turn.ns-myapp.dbrs.space")
|
TURNDomain string // TURN domain (e.g., "turn.ns-myapp.dbrs.space")
|
||||||
TURNSecret string // HMAC-SHA1 shared secret for TURN credential generation
|
TURNSecret string // HMAC-SHA1 shared secret for TURN credential generation
|
||||||
|
|
||||||
|
// SNIRouterEnabled gates the stealth TURN-over-443 SNI router (feat-124).
|
||||||
|
// Rendered as the top-level sni_router.enabled flag. Default false keeps
|
||||||
|
// existing nodes byte-identical (Caddy stays on :443); when true the node
|
||||||
|
// runs orama-sni-router on :443 and Caddy moves to :8443. This value is
|
||||||
|
// carried forward across config regeneration from the existing node.yaml
|
||||||
|
// (see production/config.go populateSNIRouterConfig) so a regen never wipes
|
||||||
|
// an operator's opt-in (the same preserve-from-existing discipline as the
|
||||||
|
// webrtc block, bugboard #259/#846).
|
||||||
|
SNIRouterEnabled bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// GatewayConfigData holds parameters for gateway.yaml rendering
|
// GatewayConfigData holds parameters for gateway.yaml rendering
|
||||||
|
|||||||
@ -103,6 +103,36 @@ func TestRenderNodeConfig_webRTC(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRenderNodeConfig_sniRouter(t *testing.T) {
|
||||||
|
// Enabled: top-level sni_router block renders enabled: true.
|
||||||
|
enabled, err := RenderNodeConfig(NodeConfigData{
|
||||||
|
NodeID: "node1",
|
||||||
|
SNIRouterEnabled: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("RenderNodeConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(enabled, "sni_router:") {
|
||||||
|
t.Errorf("rendered node config missing sni_router block\n---\n%s", enabled)
|
||||||
|
}
|
||||||
|
if !strings.Contains(enabled, "enabled: true") {
|
||||||
|
t.Errorf("sni_router should render enabled: true\n---\n%s", enabled)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default: the block is always present, defaulting to false (so the flag is
|
||||||
|
// discoverable to operators and round-trips through regen).
|
||||||
|
disabled, err := RenderNodeConfig(NodeConfigData{NodeID: "node1"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("RenderNodeConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(disabled, "sni_router:") {
|
||||||
|
t.Errorf("sni_router block should always be present\n---\n%s", disabled)
|
||||||
|
}
|
||||||
|
if !strings.Contains(disabled, "enabled: false") {
|
||||||
|
t.Errorf("default sni_router should render enabled: false\n---\n%s", disabled)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRenderGatewayConfig(t *testing.T) {
|
func TestRenderGatewayConfig(t *testing.T) {
|
||||||
bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890"
|
bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890"
|
||||||
data := GatewayConfigData{
|
data := GatewayConfigData{
|
||||||
|
|||||||
@ -1114,6 +1114,48 @@ func (g *Gateway) namespaceWebRTCDisablePublicHandler(w http.ResponseWriter, r *
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// namespaceWebRTCStealthPublicHandler handles POST /v1/namespace/webrtc/stealth/{enable|disable}
|
||||||
|
// (feat-124). Public: authenticated by JWT/API key via auth middleware;
|
||||||
|
// namespace from context. `enable` is true for the enable route.
|
||||||
|
func (g *Gateway) namespaceWebRTCStealthPublicHandler(w http.ResponseWriter, r *http.Request, enable bool) {
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
namespaceName, _ := r.Context().Value(CtxKeyNamespaceOverride).(string)
|
||||||
|
if namespaceName == "" {
|
||||||
|
writeError(w, http.StatusForbidden, "namespace not resolved")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if g.webrtcManager == nil {
|
||||||
|
writeError(w, http.StatusServiceUnavailable, "WebRTC management not enabled")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
action := "disabled"
|
||||||
|
if enable {
|
||||||
|
action = "enabled"
|
||||||
|
err = g.webrtcManager.EnableWebRTCStealth(r.Context(), namespaceName)
|
||||||
|
} else {
|
||||||
|
err = g.webrtcManager.DisableWebRTCStealth(r.Context(), namespaceName)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||||
|
"status": "ok",
|
||||||
|
"namespace": namespaceName,
|
||||||
|
"message": "WebRTC stealth " + action + " successfully",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// namespaceWebRTCStatusPublicHandler handles GET /v1/namespace/webrtc/status
|
// namespaceWebRTCStatusPublicHandler handles GET /v1/namespace/webrtc/status
|
||||||
// Public: authenticated by JWT/API key via auth middleware. Namespace from context.
|
// Public: authenticated by JWT/API key via auth middleware. Namespace from context.
|
||||||
func (g *Gateway) namespaceWebRTCStatusPublicHandler(w http.ResponseWriter, r *http.Request) {
|
func (g *Gateway) namespaceWebRTCStatusPublicHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|||||||
@ -64,6 +64,12 @@ type WebRTCManager interface {
|
|||||||
DisableWebRTC(ctx context.Context, namespaceName string) error
|
DisableWebRTC(ctx context.Context, namespaceName string) error
|
||||||
// GetWebRTCStatus returns the WebRTC config for a namespace, or nil if not enabled.
|
// GetWebRTCStatus returns the WebRTC config for a namespace, or nil if not enabled.
|
||||||
GetWebRTCStatus(ctx context.Context, namespaceName string) (interface{}, error)
|
GetWebRTCStatus(ctx context.Context, namespaceName string) (interface{}, error)
|
||||||
|
// EnableWebRTCStealth / DisableWebRTCStealth toggle the censorship-
|
||||||
|
// resistant TURNS:443 path (feat-124): stealth cert on the TURN servers,
|
||||||
|
// stealth DNS records, and the turns:<stealth-host>:443 rung in the
|
||||||
|
// turn.credentials URI ladder. Requires WebRTC to already be enabled.
|
||||||
|
EnableWebRTCStealth(ctx context.Context, namespaceName string) error
|
||||||
|
DisableWebRTCStealth(ctx context.Context, namespaceName string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handlers holds dependencies for authentication HTTP handlers
|
// Handlers holds dependencies for authentication HTTP handlers
|
||||||
|
|||||||
@ -53,6 +53,8 @@ type SpawnRequest struct {
|
|||||||
GatewaySFUPort int `json:"gateway_sfu_port,omitempty"`
|
GatewaySFUPort int `json:"gateway_sfu_port,omitempty"`
|
||||||
GatewayTURNDomain string `json:"gateway_turn_domain,omitempty"`
|
GatewayTURNDomain string `json:"gateway_turn_domain,omitempty"`
|
||||||
GatewayTURNSecret string `json:"gateway_turn_secret,omitempty"`
|
GatewayTURNSecret string `json:"gateway_turn_secret,omitempty"`
|
||||||
|
// Stealth TURNS:443 host (feat-124); empty when stealth is disabled.
|
||||||
|
GatewayTURNStealthDomain string `json:"gateway_turn_stealth_domain,omitempty"`
|
||||||
// Host serverless secrets encryption key forwarded to the spawned
|
// Host serverless secrets encryption key forwarded to the spawned
|
||||||
// namespace gateway (bugboard #837 follow-up). Same value on every node.
|
// namespace gateway (bugboard #837 follow-up). Same value on every node.
|
||||||
GatewaySecretsEncryptionKey string `json:"gateway_secrets_encryption_key,omitempty"`
|
GatewaySecretsEncryptionKey string `json:"gateway_secrets_encryption_key,omitempty"`
|
||||||
@ -75,6 +77,7 @@ type SpawnRequest struct {
|
|||||||
TURNRelayStart int `json:"turn_relay_start,omitempty"`
|
TURNRelayStart int `json:"turn_relay_start,omitempty"`
|
||||||
TURNRelayEnd int `json:"turn_relay_end,omitempty"`
|
TURNRelayEnd int `json:"turn_relay_end,omitempty"`
|
||||||
TURNDomain string `json:"turn_domain,omitempty"`
|
TURNDomain string `json:"turn_domain,omitempty"`
|
||||||
|
TURNStealthDomain string `json:"turn_stealth_domain,omitempty"`
|
||||||
|
|
||||||
// Cluster state (when action = "save-cluster-state")
|
// Cluster state (when action = "save-cluster-state")
|
||||||
ClusterState json.RawMessage `json:"cluster_state,omitempty"`
|
ClusterState json.RawMessage `json:"cluster_state,omitempty"`
|
||||||
@ -237,6 +240,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
WebRTCEnabled: req.GatewayWebRTCEnabled,
|
WebRTCEnabled: req.GatewayWebRTCEnabled,
|
||||||
SFUPort: req.GatewaySFUPort,
|
SFUPort: req.GatewaySFUPort,
|
||||||
TURNDomain: req.GatewayTURNDomain,
|
TURNDomain: req.GatewayTURNDomain,
|
||||||
|
TURNStealthDomain: req.GatewayTURNStealthDomain,
|
||||||
TURNSecret: req.GatewayTURNSecret,
|
TURNSecret: req.GatewayTURNSecret,
|
||||||
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
|
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
|
||||||
}
|
}
|
||||||
@ -291,6 +295,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
WebRTCEnabled: req.GatewayWebRTCEnabled,
|
WebRTCEnabled: req.GatewayWebRTCEnabled,
|
||||||
SFUPort: req.GatewaySFUPort,
|
SFUPort: req.GatewaySFUPort,
|
||||||
TURNDomain: req.GatewayTURNDomain,
|
TURNDomain: req.GatewayTURNDomain,
|
||||||
|
TURNStealthDomain: req.GatewayTURNStealthDomain,
|
||||||
TURNSecret: req.GatewayTURNSecret,
|
TURNSecret: req.GatewayTURNSecret,
|
||||||
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
|
SecretsEncryptionKey: req.GatewaySecretsEncryptionKey,
|
||||||
}
|
}
|
||||||
@ -360,6 +365,7 @@ func (h *SpawnHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||||||
RelayPortStart: req.TURNRelayStart,
|
RelayPortStart: req.TURNRelayStart,
|
||||||
RelayPortEnd: req.TURNRelayEnd,
|
RelayPortEnd: req.TURNRelayEnd,
|
||||||
TURNDomain: req.TURNDomain,
|
TURNDomain: req.TURNDomain,
|
||||||
|
StealthDomain: req.TURNStealthDomain,
|
||||||
}
|
}
|
||||||
if err := h.systemdSpawner.SpawnTURN(ctx, req.Namespace, req.NodeID, cfg); err != nil {
|
if err := h.systemdSpawner.SpawnTURN(ctx, req.Namespace, req.NodeID, cfg); err != nil {
|
||||||
h.logger.Error("Failed to spawn TURN instance", zap.Error(err))
|
h.logger.Error("Failed to spawn TURN instance", zap.Error(err))
|
||||||
|
|||||||
@ -95,6 +95,11 @@ type InstanceConfig struct {
|
|||||||
SFUPort int // SFU signaling port on this node
|
SFUPort int // SFU signaling port on this node
|
||||||
TURNDomain string // TURN server domain (e.g., "turn.ns-alice.orama-devnet.network")
|
TURNDomain string // TURN server domain (e.g., "turn.ns-alice.orama-devnet.network")
|
||||||
TURNSecret string // TURN shared secret for credential generation
|
TURNSecret string // TURN shared secret for credential generation
|
||||||
|
// TURNStealthDomain is the neutral stealth TURNS host (feat-124,
|
||||||
|
// cdn-<hash>.<base-domain>). Non-empty only when webrtc stealth is
|
||||||
|
// enabled for the namespace; turn.credentials then advertises
|
||||||
|
// `turns:<TURNStealthDomain>:443` as the final URI-ladder rung.
|
||||||
|
TURNStealthDomain string
|
||||||
// SecretsEncryptionKey is the host-wide AES-256 serverless secrets
|
// SecretsEncryptionKey is the host-wide AES-256 serverless secrets
|
||||||
// encryption key (hex-encoded). Bugboard #837 follow-up: the host gateway
|
// encryption key (hex-encoded). Bugboard #837 follow-up: the host gateway
|
||||||
// receives this via gateway.Config but spawned namespace gateways never
|
// receives this via gateway.Config but spawned namespace gateways never
|
||||||
@ -113,6 +118,7 @@ type GatewayYAMLWebRTC struct {
|
|||||||
SFUPort int `yaml:"sfu_port,omitempty"`
|
SFUPort int `yaml:"sfu_port,omitempty"`
|
||||||
TURNDomain string `yaml:"turn_domain,omitempty"`
|
TURNDomain string `yaml:"turn_domain,omitempty"`
|
||||||
TURNSecret string `yaml:"turn_secret,omitempty"`
|
TURNSecret string `yaml:"turn_secret,omitempty"`
|
||||||
|
TURNStealthDomain string `yaml:"turn_stealth_domain,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// GatewayYAMLConfig represents the gateway YAML configuration structure
|
// GatewayYAMLConfig represents the gateway YAML configuration structure
|
||||||
@ -338,6 +344,7 @@ func (is *InstanceSpawner) generateConfig(configPath string, cfg InstanceConfig,
|
|||||||
SFUPort: cfg.SFUPort,
|
SFUPort: cfg.SFUPort,
|
||||||
TURNDomain: cfg.TURNDomain,
|
TURNDomain: cfg.TURNDomain,
|
||||||
TURNSecret: cfg.TURNSecret,
|
TURNSecret: cfg.TURNSecret,
|
||||||
|
TURNStealthDomain: cfg.TURNStealthDomain,
|
||||||
},
|
},
|
||||||
SecretsEncryptionKey: cfg.SecretsEncryptionKey,
|
SecretsEncryptionKey: cfg.SecretsEncryptionKey,
|
||||||
}
|
}
|
||||||
|
|||||||
@ -67,6 +67,12 @@ func (g *Gateway) Routes() http.Handler {
|
|||||||
// Namespace WebRTC enable/disable/status (public, JWT/API key auth via middleware)
|
// Namespace WebRTC enable/disable/status (public, JWT/API key auth via middleware)
|
||||||
mux.HandleFunc("/v1/namespace/webrtc/enable", g.namespaceWebRTCEnablePublicHandler)
|
mux.HandleFunc("/v1/namespace/webrtc/enable", g.namespaceWebRTCEnablePublicHandler)
|
||||||
mux.HandleFunc("/v1/namespace/webrtc/disable", g.namespaceWebRTCDisablePublicHandler)
|
mux.HandleFunc("/v1/namespace/webrtc/disable", g.namespaceWebRTCDisablePublicHandler)
|
||||||
|
mux.HandleFunc("/v1/namespace/webrtc/stealth/enable", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
g.namespaceWebRTCStealthPublicHandler(w, r, true)
|
||||||
|
})
|
||||||
|
mux.HandleFunc("/v1/namespace/webrtc/stealth/disable", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
g.namespaceWebRTCStealthPublicHandler(w, r, false)
|
||||||
|
})
|
||||||
mux.HandleFunc("/v1/namespace/webrtc/status", g.namespaceWebRTCStatusPublicHandler)
|
mux.HandleFunc("/v1/namespace/webrtc/status", g.namespaceWebRTCStatusPublicHandler)
|
||||||
|
|
||||||
// auth endpoints
|
// auth endpoints
|
||||||
|
|||||||
@ -695,6 +695,7 @@ func (cm *ClusterManager) spawnGatewayRemote(ctx context.Context, nodeIP string,
|
|||||||
"gateway_sfu_port": cfg.SFUPort,
|
"gateway_sfu_port": cfg.SFUPort,
|
||||||
"gateway_turn_domain": cfg.TURNDomain,
|
"gateway_turn_domain": cfg.TURNDomain,
|
||||||
"gateway_turn_secret": cfg.TURNSecret,
|
"gateway_turn_secret": cfg.TURNSecret,
|
||||||
|
"gateway_turn_stealth_domain": cfg.TURNStealthDomain,
|
||||||
// Bugboard #837 follow-up: carry the host secrets encryption key to
|
// Bugboard #837 follow-up: carry the host secrets encryption key to
|
||||||
// the remote node so its spawned namespace gateway can manage secrets.
|
// the remote node so its spawned namespace gateway can manage secrets.
|
||||||
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
|
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
|
||||||
@ -1614,6 +1615,7 @@ func (cm *ClusterManager) restoreClusterOnNode(ctx context.Context, clusterID, n
|
|||||||
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
||||||
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", namespaceName, cm.baseDomain)
|
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", namespaceName, cm.baseDomain)
|
||||||
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
||||||
|
gwCfg.TURNStealthDomain = cm.stealthDomainFor(namespaceName, webrtcCfg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1681,6 +1683,7 @@ type ClusterLocalState struct {
|
|||||||
HasTURN bool `json:"has_turn,omitempty"`
|
HasTURN bool `json:"has_turn,omitempty"`
|
||||||
TURNSharedSecret string `json:"turn_shared_secret,omitempty"` // Needed for gateway to generate TURN credentials on cold start
|
TURNSharedSecret string `json:"turn_shared_secret,omitempty"` // Needed for gateway to generate TURN credentials on cold start
|
||||||
TURNDomain string `json:"turn_domain,omitempty"` // TURN server domain for gateway config
|
TURNDomain string `json:"turn_domain,omitempty"` // TURN server domain for gateway config
|
||||||
|
TURNStealthDomain string `json:"turn_stealth_domain,omitempty"` // Stealth TURNS:443 host (feat-124); empty when stealth disabled
|
||||||
TURNCredentialTTL int `json:"turn_credential_ttl,omitempty"`
|
TURNCredentialTTL int `json:"turn_credential_ttl,omitempty"`
|
||||||
SFUSignalingPort int `json:"sfu_signaling_port,omitempty"`
|
SFUSignalingPort int `json:"sfu_signaling_port,omitempty"`
|
||||||
SFUMediaPortStart int `json:"sfu_media_port_start,omitempty"`
|
SFUMediaPortStart int `json:"sfu_media_port_start,omitempty"`
|
||||||
@ -1840,6 +1843,7 @@ type restoreWebRTC struct {
|
|||||||
sfuPort int
|
sfuPort int
|
||||||
turnDomain string
|
turnDomain string
|
||||||
turnSecret string
|
turnSecret string
|
||||||
|
stealthDomain string // feat-124: empty when webrtc stealth is disabled
|
||||||
}
|
}
|
||||||
|
|
||||||
// chooseRestoreWebRTC resolves a restored gateway's WebRTC config. TWO
|
// chooseRestoreWebRTC resolves a restored gateway's WebRTC config. TWO
|
||||||
@ -1864,11 +1868,12 @@ type restoreWebRTC struct {
|
|||||||
// Extracted as a pure function so the precedence is unit-testable without
|
// Extracted as a pure function so the precedence is unit-testable without
|
||||||
// standing up the full restore path (systemd spawner + DB + port store).
|
// standing up the full restore path (systemd spawner + DB + port store).
|
||||||
func chooseRestoreWebRTC(
|
func chooseRestoreWebRTC(
|
||||||
stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret string,
|
stateHasSFU bool, stateSFUPort int, stateTURNDomain, stateTURNSecret, stateStealthDomain string,
|
||||||
dbFetch func() (turnSecret, turnDomain string, sfuPort int),
|
dbFetch func() (turnSecret, turnDomain, stealthDomain string, sfuPort int),
|
||||||
) restoreWebRTC {
|
) restoreWebRTC {
|
||||||
turnSecret := stateTURNSecret
|
turnSecret := stateTURNSecret
|
||||||
turnDomain := stateTURNDomain
|
turnDomain := stateTURNDomain
|
||||||
|
stealthDomain := stateStealthDomain
|
||||||
sfuPort := 0
|
sfuPort := 0
|
||||||
if stateHasSFU && stateSFUPort > 0 {
|
if stateHasSFU && stateSFUPort > 0 {
|
||||||
sfuPort = stateSFUPort
|
sfuPort = stateSFUPort
|
||||||
@ -1878,12 +1883,17 @@ func chooseRestoreWebRTC(
|
|||||||
// the marker that the namespace has WebRTC enabled at all. The state
|
// the marker that the namespace has WebRTC enabled at all. The state
|
||||||
// file is not updated by EnableWebRTC, so a namespace enabled after
|
// file is not updated by EnableWebRTC, so a namespace enabled after
|
||||||
// the state file was written reaches here with an empty secret.
|
// the state file was written reaches here with an empty secret.
|
||||||
|
// (Stealth toggles DO rewrite cluster state on every node, so the
|
||||||
|
// state-first read stays fresh for stealthDomain too.)
|
||||||
if turnSecret == "" {
|
if turnSecret == "" {
|
||||||
if dbSecret, dbDomain, dbSFU := dbFetch(); dbSecret != "" {
|
if dbSecret, dbDomain, dbStealth, dbSFU := dbFetch(); dbSecret != "" {
|
||||||
turnSecret = dbSecret
|
turnSecret = dbSecret
|
||||||
if turnDomain == "" {
|
if turnDomain == "" {
|
||||||
turnDomain = dbDomain
|
turnDomain = dbDomain
|
||||||
}
|
}
|
||||||
|
if stealthDomain == "" {
|
||||||
|
stealthDomain = dbStealth
|
||||||
|
}
|
||||||
if sfuPort == 0 {
|
if sfuPort == 0 {
|
||||||
sfuPort = dbSFU
|
sfuPort = dbSFU
|
||||||
}
|
}
|
||||||
@ -1895,6 +1905,7 @@ func chooseRestoreWebRTC(
|
|||||||
sfuPort: sfuPort,
|
sfuPort: sfuPort,
|
||||||
turnDomain: turnDomain,
|
turnDomain: turnDomain,
|
||||||
turnSecret: turnSecret,
|
turnSecret: turnSecret,
|
||||||
|
stealthDomain: stealthDomain,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2050,11 +2061,11 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
|||||||
// fields here. The lazy dbFetch only hits the DB when the state
|
// fields here. The lazy dbFetch only hits the DB when the state
|
||||||
// file is incomplete.
|
// file is incomplete.
|
||||||
wr := chooseRestoreWebRTC(
|
wr := chooseRestoreWebRTC(
|
||||||
state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret,
|
state.HasSFU, state.SFUSignalingPort, state.TURNDomain, state.TURNSharedSecret, state.TURNStealthDomain,
|
||||||
func() (turnSecret, turnDomain string, sfuPort int) {
|
func() (turnSecret, turnDomain, stealthDomain string, sfuPort int) {
|
||||||
webrtcCfg, err := cm.GetWebRTCConfig(ctx, state.NamespaceName)
|
webrtcCfg, err := cm.GetWebRTCConfig(ctx, state.NamespaceName)
|
||||||
if err != nil || webrtcCfg == nil {
|
if err != nil || webrtcCfg == nil {
|
||||||
return "", "", 0
|
return "", "", "", 0
|
||||||
}
|
}
|
||||||
// TURN is namespace-wide; SFU port is per-node and may be
|
// TURN is namespace-wide; SFU port is per-node and may be
|
||||||
// absent on a gateway-only (non-SFU) node — that's fine,
|
// absent on a gateway-only (non-SFU) node — that's fine,
|
||||||
@ -2065,6 +2076,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
|||||||
}
|
}
|
||||||
return webrtcCfg.TURNSharedSecret,
|
return webrtcCfg.TURNSharedSecret,
|
||||||
fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
|
fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
|
||||||
|
cm.stealthDomainFor(state.NamespaceName, webrtcCfg),
|
||||||
sfu
|
sfu
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -2076,6 +2088,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
|||||||
gwCfg.SFUPort = wr.sfuPort
|
gwCfg.SFUPort = wr.sfuPort
|
||||||
gwCfg.TURNDomain = wr.turnDomain
|
gwCfg.TURNDomain = wr.turnDomain
|
||||||
gwCfg.TURNSecret = wr.turnSecret
|
gwCfg.TURNSecret = wr.turnSecret
|
||||||
|
gwCfg.TURNStealthDomain = wr.stealthDomain
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := http.Get(fmt.Sprintf("http://localhost:%d/v1/health", pb.GatewayHTTPPort))
|
resp, err := http.Get(fmt.Sprintf("http://localhost:%d/v1/health", pb.GatewayHTTPPort))
|
||||||
@ -2126,6 +2139,7 @@ func (cm *ClusterManager) restoreClusterFromState(ctx context.Context, state *Cl
|
|||||||
RelayPortStart: state.TURNRelayPortStart,
|
RelayPortStart: state.TURNRelayPortStart,
|
||||||
RelayPortEnd: state.TURNRelayPortEnd,
|
RelayPortEnd: state.TURNRelayPortEnd,
|
||||||
TURNDomain: fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
|
TURNDomain: fmt.Sprintf("turn.ns-%s.%s", state.NamespaceName, cm.baseDomain),
|
||||||
|
StealthDomain: cm.stealthDomainFor(state.NamespaceName, webrtcCfg),
|
||||||
}
|
}
|
||||||
if err := cm.systemdSpawner.SpawnTURN(ctx, state.NamespaceName, cm.localNodeID, turnCfg); err != nil {
|
if err := cm.systemdSpawner.SpawnTURN(ctx, state.NamespaceName, cm.localNodeID, turnCfg); err != nil {
|
||||||
cm.logger.Error("Failed to restore TURN from state", zap.String("namespace", state.NamespaceName), zap.Error(err))
|
cm.logger.Error("Failed to restore TURN from state", zap.String("namespace", state.NamespaceName), zap.Error(err))
|
||||||
|
|||||||
263
core/pkg/namespace/cluster_manager_stealth.go
Normal file
263
core/pkg/namespace/cluster_manager_stealth.go
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
package namespace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/client"
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Stealth TURNS-over-443 lifecycle (feat-124, censorship-resistant calling).
|
||||||
|
//
|
||||||
|
// Enabling stealth for a namespace whose WebRTC is already running:
|
||||||
|
// 1. creates DNS A records for the neutral stealth host -> the TURN nodes,
|
||||||
|
// 2. flips namespace_webrtc_config.stealth_enabled,
|
||||||
|
// 3. re-spawns the namespace's TURN servers with the stealth domain (the
|
||||||
|
// spawner provisions a Let's Encrypt cert for it — hard-fail, never
|
||||||
|
// self-signed),
|
||||||
|
// 4. rewrites cluster-state.json on every node (so DB-less restores keep
|
||||||
|
// the stealth domain), and
|
||||||
|
// 5. restarts the namespace gateways so turn.credentials advertises
|
||||||
|
// `turns:<stealth-host>:443` as the final URI-ladder rung.
|
||||||
|
//
|
||||||
|
// The SNI router on :443 discovers the route (stealth host -> local TURN TLS
|
||||||
|
// port) from the TURN config files on disk — no extra registration step.
|
||||||
|
|
||||||
|
// stealthDomainFor returns the namespace's stealth TURNS host when stealth is
|
||||||
|
// enabled in its WebRTC config, else "" (callers treat empty as disabled).
|
||||||
|
func (cm *ClusterManager) stealthDomainFor(namespaceName string, webrtcCfg *WebRTCConfig) string {
|
||||||
|
if webrtcCfg == nil || !webrtcCfg.StealthEnabled {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return turn.StealthHostForNamespace(namespaceName, cm.baseDomain)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnableWebRTCStealth enables the stealth TURNS:443 path for a namespace.
|
||||||
|
// Requires WebRTC to already be enabled.
|
||||||
|
func (cm *ClusterManager) EnableWebRTCStealth(ctx context.Context, namespaceName string) error {
|
||||||
|
cluster, webrtcCfg, err := cm.getStealthPrereqs(ctx, namespaceName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if webrtcCfg.StealthEnabled {
|
||||||
|
return ErrWebRTCStealthAlreadyEnabled
|
||||||
|
}
|
||||||
|
|
||||||
|
stealthDomain := turn.StealthHostForNamespace(namespaceName, cm.baseDomain)
|
||||||
|
cm.logger.Info("Enabling WebRTC stealth for namespace",
|
||||||
|
zap.String("namespace", namespaceName),
|
||||||
|
zap.String("stealth_domain", stealthDomain))
|
||||||
|
|
||||||
|
clusterNodes, err := cm.getClusterNodesWithIPs(ctx, cluster.ID)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get cluster nodes: %w", err)
|
||||||
|
}
|
||||||
|
turnBlocks, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get TURN allocations for namespace %s: %w", namespaceName, err)
|
||||||
|
}
|
||||||
|
if len(turnBlocks) == 0 {
|
||||||
|
return fmt.Errorf("no TURN allocations found for namespace %s (is WebRTC fully enabled?)", namespaceName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DNS first — cert provisioning and clients both need the name to resolve.
|
||||||
|
var turnIPs []string
|
||||||
|
for _, block := range turnBlocks {
|
||||||
|
for _, n := range clusterNodes {
|
||||||
|
if n.NodeID == block.NodeID {
|
||||||
|
turnIPs = append(turnIPs, n.PublicIP)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := cm.dnsManager.CreateStealthTURNRecords(ctx, namespaceName, stealthDomain, turnIPs); err != nil {
|
||||||
|
return fmt.Errorf("failed to create stealth DNS records: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cm.setStealthEnabled(ctx, cluster.ID, true); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-spawn TURN with the stealth domain; roll back on failure so the
|
||||||
|
// board never claims a stealth endpoint that doesn't terminate TLS.
|
||||||
|
if err := cm.respawnTURNWithStealth(ctx, cluster, clusterNodes, turnBlocks, webrtcCfg.TURNSharedSecret, stealthDomain); err != nil {
|
||||||
|
cm.rollbackStealthEnable(ctx, cluster.ID, namespaceName)
|
||||||
|
return fmt.Errorf("failed to re-spawn TURN with stealth cert (stealth rolled back): %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cm.refreshStateAndGateways(ctx, cluster, clusterNodes, stealthDomain, webrtcCfg.TURNSharedSecret)
|
||||||
|
cm.logEvent(ctx, cluster.ID, EventWebRTCEnabled, "",
|
||||||
|
fmt.Sprintf("WebRTC stealth enabled (%s)", stealthDomain), nil)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DisableWebRTCStealth turns the stealth TURNS:443 path off again. TURN and
|
||||||
|
// the baseline ladder (udp/tcp 3478, turns:5349) keep running.
|
||||||
|
func (cm *ClusterManager) DisableWebRTCStealth(ctx context.Context, namespaceName string) error {
|
||||||
|
cluster, webrtcCfg, err := cm.getStealthPrereqs(ctx, namespaceName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !webrtcCfg.StealthEnabled {
|
||||||
|
return ErrWebRTCStealthNotEnabled
|
||||||
|
}
|
||||||
|
|
||||||
|
cm.logger.Info("Disabling WebRTC stealth for namespace", zap.String("namespace", namespaceName))
|
||||||
|
|
||||||
|
clusterNodes, err := cm.getClusterNodesWithIPs(ctx, cluster.ID)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get cluster nodes: %w", err)
|
||||||
|
}
|
||||||
|
turnBlocks, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get TURN allocations: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cm.setStealthEnabled(ctx, cluster.ID, false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := cm.respawnTURNWithStealth(ctx, cluster, clusterNodes, turnBlocks, webrtcCfg.TURNSharedSecret, ""); err != nil {
|
||||||
|
return fmt.Errorf("failed to re-spawn TURN without stealth: %w", err)
|
||||||
|
}
|
||||||
|
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
|
||||||
|
cm.logger.Warn("Failed to delete stealth DNS records", zap.Error(err))
|
||||||
|
}
|
||||||
|
cm.refreshStateAndGateways(ctx, cluster, clusterNodes, "", webrtcCfg.TURNSharedSecret)
|
||||||
|
cm.logEvent(ctx, cluster.ID, EventWebRTCDisabled, "", "WebRTC stealth disabled", nil)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getStealthPrereqs validates the cluster exists and WebRTC is enabled,
|
||||||
|
// returning both records (with the TURN secret already decrypted).
|
||||||
|
func (cm *ClusterManager) getStealthPrereqs(ctx context.Context, namespaceName string) (*NamespaceCluster, *WebRTCConfig, error) {
|
||||||
|
cluster, err := cm.GetClusterByNamespace(ctx, namespaceName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to get cluster: %w", err)
|
||||||
|
}
|
||||||
|
if cluster == nil {
|
||||||
|
return nil, nil, ErrClusterNotFound
|
||||||
|
}
|
||||||
|
webrtcCfg, err := cm.GetWebRTCConfig(ctx, namespaceName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, fmt.Errorf("failed to get WebRTC config: %w", err)
|
||||||
|
}
|
||||||
|
if webrtcCfg == nil {
|
||||||
|
return nil, nil, ErrWebRTCNotEnabled
|
||||||
|
}
|
||||||
|
return cluster, webrtcCfg, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// setStealthEnabled flips the stealth flag in namespace_webrtc_config.
|
||||||
|
func (cm *ClusterManager) setStealthEnabled(ctx context.Context, clusterID string, enabled bool) error {
|
||||||
|
internalCtx := client.WithInternalAuth(ctx)
|
||||||
|
val := 0
|
||||||
|
if enabled {
|
||||||
|
val = 1
|
||||||
|
}
|
||||||
|
if _, err := cm.db.Exec(internalCtx,
|
||||||
|
`UPDATE namespace_webrtc_config SET stealth_enabled = ? WHERE namespace_cluster_id = ? AND enabled = 1`,
|
||||||
|
val, clusterID); err != nil {
|
||||||
|
return fmt.Errorf("failed to update stealth_enabled: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// respawnTURNWithStealth stops and re-spawns every TURN instance of the
|
||||||
|
// cluster with the given stealth domain ("" = stealth off). The spawner
|
||||||
|
// provisions the stealth cert and writes the new TURN config; the SNI
|
||||||
|
// router's discovery picks the route change up from disk.
|
||||||
|
func (cm *ClusterManager) respawnTURNWithStealth(
|
||||||
|
ctx context.Context,
|
||||||
|
cluster *NamespaceCluster,
|
||||||
|
clusterNodes []clusterNodeInfo,
|
||||||
|
turnBlocks []WebRTCPortBlock,
|
||||||
|
turnSecret, stealthDomain string,
|
||||||
|
) error {
|
||||||
|
turnDomain := fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||||
|
for _, block := range turnBlocks {
|
||||||
|
var node *clusterNodeInfo
|
||||||
|
for i := range clusterNodes {
|
||||||
|
if clusterNodes[i].NodeID == block.NodeID {
|
||||||
|
node = &clusterNodes[i]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if node == nil {
|
||||||
|
return fmt.Errorf("TURN node %s not found in cluster nodes", block.NodeID)
|
||||||
|
}
|
||||||
|
|
||||||
|
cm.stopTURNOnNode(ctx, node.NodeID, node.InternalIP, cluster.NamespaceName)
|
||||||
|
turnCfg := TURNInstanceConfig{
|
||||||
|
Namespace: cluster.NamespaceName,
|
||||||
|
NodeID: node.NodeID,
|
||||||
|
ListenAddr: fmt.Sprintf("0.0.0.0:%d", block.TURNListenPort),
|
||||||
|
TURNSListenAddr: fmt.Sprintf("0.0.0.0:%d", block.TURNTLSPort),
|
||||||
|
PublicIP: node.PublicIP,
|
||||||
|
Realm: cm.baseDomain,
|
||||||
|
AuthSecret: turnSecret,
|
||||||
|
RelayPortStart: block.TURNRelayPortStart,
|
||||||
|
RelayPortEnd: block.TURNRelayPortEnd,
|
||||||
|
TURNDomain: turnDomain,
|
||||||
|
StealthDomain: stealthDomain,
|
||||||
|
}
|
||||||
|
if err := cm.spawnTURNOnNode(ctx, *node, cluster.NamespaceName, turnCfg); err != nil {
|
||||||
|
return fmt.Errorf("failed to re-spawn TURN on node %s: %w", node.NodeID, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// rollbackStealthEnable best-effort reverts the DB flag + DNS records after a
|
||||||
|
// failed stealth enable, so the system never advertises a half-built path.
|
||||||
|
func (cm *ClusterManager) rollbackStealthEnable(ctx context.Context, clusterID, namespaceName string) {
|
||||||
|
if err := cm.setStealthEnabled(ctx, clusterID, false); err != nil {
|
||||||
|
cm.logger.Warn("Stealth rollback: failed to clear stealth_enabled", zap.Error(err))
|
||||||
|
}
|
||||||
|
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
|
||||||
|
cm.logger.Warn("Stealth rollback: failed to delete DNS records", zap.Error(err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// refreshStateAndGateways rewrites cluster-state.json on all nodes with the
|
||||||
|
// new stealth domain and restarts the namespace gateways so turn.credentials
|
||||||
|
// reflects the change. Failures are logged per node (the reconciler converges
|
||||||
|
// stragglers later via the gatewayConfigInSync drift check).
|
||||||
|
func (cm *ClusterManager) refreshStateAndGateways(
|
||||||
|
ctx context.Context,
|
||||||
|
cluster *NamespaceCluster,
|
||||||
|
clusterNodes []clusterNodeInfo,
|
||||||
|
stealthDomain, turnSecret string,
|
||||||
|
) {
|
||||||
|
turnDomain := fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||||
|
|
||||||
|
sfuBlockList, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "sfu")
|
||||||
|
if err != nil {
|
||||||
|
cm.logger.Warn("Failed to get SFU allocations for state refresh", zap.Error(err))
|
||||||
|
}
|
||||||
|
turnBlockList, err := cm.getWebRTCBlocksByType(ctx, cluster.ID, "turn")
|
||||||
|
if err != nil {
|
||||||
|
cm.logger.Warn("Failed to get TURN allocations for state refresh", zap.Error(err))
|
||||||
|
}
|
||||||
|
sfuBlocks := make(map[string]*WebRTCPortBlock)
|
||||||
|
for i := range sfuBlockList {
|
||||||
|
sfuBlocks[sfuBlockList[i].NodeID] = &sfuBlockList[i]
|
||||||
|
}
|
||||||
|
turnBlocks := make(map[string]*WebRTCPortBlock)
|
||||||
|
for i := range turnBlockList {
|
||||||
|
turnBlocks[turnBlockList[i].NodeID] = &turnBlockList[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, stealthDomain, turnSecret)
|
||||||
|
|
||||||
|
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
||||||
|
if err != nil {
|
||||||
|
cm.logger.Warn("Failed to get port blocks for gateway restart after stealth toggle", zap.Error(err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
nodePortBlocks := make(map[string]*PortBlock)
|
||||||
|
for i := range portBlocks {
|
||||||
|
nodePortBlocks[portBlocks[i].NodeID] = &portBlocks[i]
|
||||||
|
}
|
||||||
|
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, stealthDomain, turnSecret)
|
||||||
|
}
|
||||||
@ -204,10 +204,10 @@ func (cm *ClusterManager) EnableWebRTC(ctx context.Context, namespaceName, enabl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 14. Update cluster-state.json on all nodes with WebRTC info
|
// 14. Update cluster-state.json on all nodes with WebRTC info
|
||||||
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, turnSecret)
|
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, sfuBlocks, turnBlocks, turnDomain, "", turnSecret)
|
||||||
|
|
||||||
// 15. Restart namespace gateways with WebRTC config so they register WebRTC routes
|
// 15. Restart namespace gateways with WebRTC config so they register WebRTC routes
|
||||||
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, turnSecret)
|
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, sfuBlocks, turnDomain, "", turnSecret)
|
||||||
|
|
||||||
cm.logEvent(ctx, cluster.ID, EventWebRTCEnabled, "",
|
cm.logEvent(ctx, cluster.ID, EventWebRTCEnabled, "",
|
||||||
fmt.Sprintf("WebRTC enabled: SFU on %d nodes, TURN on %d nodes", len(clusterNodes), len(turnNodes)), nil)
|
fmt.Sprintf("WebRTC enabled: SFU on %d nodes, TURN on %d nodes", len(clusterNodes), len(turnNodes)), nil)
|
||||||
@ -273,17 +273,23 @@ func (cm *ClusterManager) DisableWebRTC(ctx context.Context, namespaceName strin
|
|||||||
cm.logger.Warn("Failed to deallocate WebRTC ports", zap.Error(err))
|
cm.logger.Warn("Failed to deallocate WebRTC ports", zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|
||||||
// 7. Delete TURN DNS records
|
// 7. Delete TURN DNS records (both the regular and the feat-124 stealth
|
||||||
|
// records — a full WebRTC teardown must not orphan stealth A records when
|
||||||
|
// the namespace had stealth enabled). Delete-by-tag is a no-op when the
|
||||||
|
// stealth records are absent, so this is safe unconditionally.
|
||||||
if err := cm.dnsManager.DeleteTURNRecords(ctx, namespaceName); err != nil {
|
if err := cm.dnsManager.DeleteTURNRecords(ctx, namespaceName); err != nil {
|
||||||
cm.logger.Warn("Failed to delete TURN DNS records", zap.Error(err))
|
cm.logger.Warn("Failed to delete TURN DNS records", zap.Error(err))
|
||||||
}
|
}
|
||||||
|
if err := cm.dnsManager.DeleteStealthTURNRecords(ctx, namespaceName); err != nil {
|
||||||
|
cm.logger.Warn("Failed to delete stealth TURN DNS records", zap.Error(err))
|
||||||
|
}
|
||||||
|
|
||||||
// 8. Clean up DB tables
|
// 8. Clean up DB tables
|
||||||
cm.db.Exec(internalCtx, `DELETE FROM webrtc_rooms WHERE namespace_cluster_id = ?`, cluster.ID)
|
cm.db.Exec(internalCtx, `DELETE FROM webrtc_rooms WHERE namespace_cluster_id = ?`, cluster.ID)
|
||||||
cm.db.Exec(internalCtx, `DELETE FROM namespace_webrtc_config WHERE namespace_cluster_id = ?`, cluster.ID)
|
cm.db.Exec(internalCtx, `DELETE FROM namespace_webrtc_config WHERE namespace_cluster_id = ?`, cluster.ID)
|
||||||
|
|
||||||
// 9. Update cluster-state.json to remove WebRTC info
|
// 9. Update cluster-state.json to remove WebRTC info
|
||||||
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, nil, nil, "", "")
|
cm.updateClusterStateWithWebRTC(ctx, cluster, clusterNodes, nil, nil, "", "", "")
|
||||||
|
|
||||||
// 10. Restart namespace gateways without WebRTC config so they unregister WebRTC routes
|
// 10. Restart namespace gateways without WebRTC config so they unregister WebRTC routes
|
||||||
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
||||||
@ -292,7 +298,7 @@ func (cm *ClusterManager) DisableWebRTC(ctx context.Context, namespaceName strin
|
|||||||
for i := range portBlocks {
|
for i := range portBlocks {
|
||||||
nodePortBlocks[portBlocks[i].NodeID] = &portBlocks[i]
|
nodePortBlocks[portBlocks[i].NodeID] = &portBlocks[i]
|
||||||
}
|
}
|
||||||
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, nil, "", "")
|
cm.restartGatewaysWithWebRTC(ctx, cluster, clusterNodes, nodePortBlocks, nil, "", "", "")
|
||||||
} else {
|
} else {
|
||||||
cm.logger.Warn("Failed to get port blocks for gateway restart after WebRTC disable", zap.Error(err))
|
cm.logger.Warn("Failed to get port blocks for gateway restart after WebRTC disable", zap.Error(err))
|
||||||
}
|
}
|
||||||
@ -498,6 +504,7 @@ func (cm *ClusterManager) spawnTURNRemote(ctx context.Context, nodeIP string, cf
|
|||||||
"turn_relay_start": cfg.RelayPortStart,
|
"turn_relay_start": cfg.RelayPortStart,
|
||||||
"turn_relay_end": cfg.RelayPortEnd,
|
"turn_relay_end": cfg.RelayPortEnd,
|
||||||
"turn_domain": cfg.TURNDomain,
|
"turn_domain": cfg.TURNDomain,
|
||||||
|
"turn_stealth_domain": cfg.StealthDomain,
|
||||||
})
|
})
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -558,7 +565,7 @@ func (cm *ClusterManager) updateClusterStateWithWebRTC(
|
|||||||
nodes []clusterNodeInfo,
|
nodes []clusterNodeInfo,
|
||||||
sfuBlocks map[string]*WebRTCPortBlock,
|
sfuBlocks map[string]*WebRTCPortBlock,
|
||||||
turnBlocks map[string]*WebRTCPortBlock,
|
turnBlocks map[string]*WebRTCPortBlock,
|
||||||
turnDomain, turnSecret string,
|
turnDomain, turnStealthDomain, turnSecret string,
|
||||||
) {
|
) {
|
||||||
// Get existing port blocks for base state
|
// Get existing port blocks for base state
|
||||||
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
portBlocks, err := cm.portAllocator.GetAllPortBlocks(ctx, cluster.ID)
|
||||||
@ -635,6 +642,7 @@ func (cm *ClusterManager) updateClusterStateWithWebRTC(
|
|||||||
}
|
}
|
||||||
// Persist TURN domain and secret so gateways can be restored on cold start
|
// Persist TURN domain and secret so gateways can be restored on cold start
|
||||||
state.TURNDomain = turnDomain
|
state.TURNDomain = turnDomain
|
||||||
|
state.TURNStealthDomain = turnStealthDomain
|
||||||
state.TURNSharedSecret = turnSecret
|
state.TURNSharedSecret = turnSecret
|
||||||
|
|
||||||
if node.NodeID == cm.localNodeID {
|
if node.NodeID == cm.localNodeID {
|
||||||
@ -671,7 +679,7 @@ func (cm *ClusterManager) restartGatewaysWithWebRTC(
|
|||||||
nodes []clusterNodeInfo,
|
nodes []clusterNodeInfo,
|
||||||
portBlocks map[string]*PortBlock,
|
portBlocks map[string]*PortBlock,
|
||||||
sfuBlocks map[string]*WebRTCPortBlock,
|
sfuBlocks map[string]*WebRTCPortBlock,
|
||||||
turnDomain, turnSecret string,
|
turnDomain, turnStealthDomain, turnSecret string,
|
||||||
) {
|
) {
|
||||||
// Build Olric server addresses from port blocks + node IPs
|
// Build Olric server addresses from port blocks + node IPs
|
||||||
var olricServers []string
|
var olricServers []string
|
||||||
@ -715,6 +723,7 @@ func (cm *ClusterManager) restartGatewaysWithWebRTC(
|
|||||||
WebRTCEnabled: webrtcEnabled,
|
WebRTCEnabled: webrtcEnabled,
|
||||||
SFUPort: sfuPort,
|
SFUPort: sfuPort,
|
||||||
TURNDomain: turnDomain,
|
TURNDomain: turnDomain,
|
||||||
|
TURNStealthDomain: turnStealthDomain,
|
||||||
TURNSecret: turnSecret,
|
TURNSecret: turnSecret,
|
||||||
// Bugboard #837 follow-up: preserve the secrets key on WebRTC
|
// Bugboard #837 follow-up: preserve the secrets key on WebRTC
|
||||||
// restarts so enabling WebRTC doesn't drop secrets management.
|
// restarts so enabling WebRTC doesn't drop secrets management.
|
||||||
@ -766,6 +775,7 @@ func (cm *ClusterManager) restartGatewayRemote(ctx context.Context, nodeIP strin
|
|||||||
"gateway_webrtc_enabled": cfg.WebRTCEnabled,
|
"gateway_webrtc_enabled": cfg.WebRTCEnabled,
|
||||||
"gateway_sfu_port": cfg.SFUPort,
|
"gateway_sfu_port": cfg.SFUPort,
|
||||||
"gateway_turn_domain": cfg.TURNDomain,
|
"gateway_turn_domain": cfg.TURNDomain,
|
||||||
|
"gateway_turn_stealth_domain": cfg.TURNStealthDomain,
|
||||||
"gateway_turn_secret": cfg.TURNSecret,
|
"gateway_turn_secret": cfg.TURNSecret,
|
||||||
// Bugboard #837 follow-up: preserve the secrets key on WebRTC restarts.
|
// Bugboard #837 follow-up: preserve the secrets key on WebRTC restarts.
|
||||||
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
|
"gateway_secrets_encryption_key": cfg.SecretsEncryptionKey,
|
||||||
|
|||||||
@ -537,6 +537,7 @@ func (cm *ClusterManager) ReplaceClusterNode(ctx context.Context, cluster *Names
|
|||||||
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
||||||
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||||
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
||||||
|
gwCfg.TURNStealthDomain = cm.stealthDomainFor(cluster.NamespaceName, webrtcCfg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1080,6 +1081,7 @@ func (cm *ClusterManager) addNodeToCluster(
|
|||||||
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
gwCfg.SFUPort = sfuBlock.SFUSignalingPort
|
||||||
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
gwCfg.TURNDomain = fmt.Sprintf("turn.ns-%s.%s", cluster.NamespaceName, cm.baseDomain)
|
||||||
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
gwCfg.TURNSecret = webrtcCfg.TURNSharedSecret
|
||||||
|
gwCfg.TURNStealthDomain = cm.stealthDomainFor(cluster.NamespaceName, webrtcCfg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -353,6 +353,78 @@ func (drm *DNSRecordManager) DeleteTURNRecords(ctx context.Context, namespaceNam
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stealthDNSNamespace is the dns_records ownership tag for a namespace's
|
||||||
|
// stealth TURNS records, distinct from "namespace-turn:" so deleting one set
|
||||||
|
// never touches the other.
|
||||||
|
func stealthDNSNamespace(namespaceName string) string {
|
||||||
|
return "namespace-turn-stealth:" + namespaceName
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateStealthTURNRecords creates DNS A records for the stealth TURNS host
|
||||||
|
// (feat-124): <stealthHost> -> TURN node IPs. The hostname is the neutral
|
||||||
|
// cdn-<hash>.<base-domain> label from turn.StealthHostForNamespace — it lives
|
||||||
|
// directly under the base domain (NOT under ns-<namespace>) so the SNI string
|
||||||
|
// never identifies the app.
|
||||||
|
func (drm *DNSRecordManager) CreateStealthTURNRecords(ctx context.Context, namespaceName, stealthHost string, turnIPs []string) error {
|
||||||
|
internalCtx := client.WithInternalAuth(ctx)
|
||||||
|
|
||||||
|
if stealthHost == "" {
|
||||||
|
return &ClusterError{Message: "no stealth host provided for DNS records"}
|
||||||
|
}
|
||||||
|
if len(turnIPs) == 0 {
|
||||||
|
return &ClusterError{Message: "no TURN IPs provided for stealth DNS records"}
|
||||||
|
}
|
||||||
|
|
||||||
|
fqdn := stealthHost + "."
|
||||||
|
|
||||||
|
drm.logger.Info("Creating stealth TURNS DNS records",
|
||||||
|
zap.String("namespace", namespaceName),
|
||||||
|
zap.String("fqdn", fqdn),
|
||||||
|
zap.Strings("turn_ips", turnIPs),
|
||||||
|
)
|
||||||
|
|
||||||
|
deleteQuery := `DELETE FROM dns_records WHERE namespace = ?`
|
||||||
|
_, _ = drm.db.Exec(internalCtx, deleteQuery, stealthDNSNamespace(namespaceName))
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
for _, ip := range turnIPs {
|
||||||
|
insertQuery := `
|
||||||
|
INSERT INTO dns_records (
|
||||||
|
fqdn, record_type, value, ttl, namespace, created_by, created_at, updated_at
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
`
|
||||||
|
_, err := drm.db.Exec(internalCtx, insertQuery,
|
||||||
|
fqdn, "A", ip, 60,
|
||||||
|
stealthDNSNamespace(namespaceName),
|
||||||
|
"cluster-manager",
|
||||||
|
now, now,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return &ClusterError{
|
||||||
|
Message: fmt.Sprintf("failed to create stealth TURNS DNS record %s -> %s", fqdn, ip),
|
||||||
|
Cause: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteStealthTURNRecords deletes a namespace's stealth TURNS DNS records.
|
||||||
|
func (drm *DNSRecordManager) DeleteStealthTURNRecords(ctx context.Context, namespaceName string) error {
|
||||||
|
internalCtx := client.WithInternalAuth(ctx)
|
||||||
|
|
||||||
|
deleteQuery := `DELETE FROM dns_records WHERE namespace = ?`
|
||||||
|
_, err := drm.db.Exec(internalCtx, deleteQuery, stealthDNSNamespace(namespaceName))
|
||||||
|
if err != nil {
|
||||||
|
return &ClusterError{
|
||||||
|
Message: "failed to delete stealth TURNS DNS records",
|
||||||
|
Cause: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// EnableNamespaceRecord marks a specific IP's record as active (for recovery)
|
// EnableNamespaceRecord marks a specific IP's record as active (for recovery)
|
||||||
func (drm *DNSRecordManager) EnableNamespaceRecord(ctx context.Context, namespaceName, ip string) error {
|
func (drm *DNSRecordManager) EnableNamespaceRecord(ctx context.Context, namespaceName, ip string) error {
|
||||||
internalCtx := client.WithInternalAuth(ctx)
|
internalCtx := client.WithInternalAuth(ctx)
|
||||||
|
|||||||
@ -55,7 +55,7 @@ func TestGatewayWebRTCInSync_matchingBlock_returnsTrue(t *testing.T) {
|
|||||||
|
|
||||||
func TestGatewayWebRTCInSync_eachFieldDriftDetected(t *testing.T) {
|
func TestGatewayWebRTCInSync_eachFieldDriftDetected(t *testing.T) {
|
||||||
// Any single drifted field must trigger a restart. Pins that the
|
// Any single drifted field must trigger a restart. Pins that the
|
||||||
// comparison covers all four webrtc fields (a future refactor that
|
// comparison covers all five webrtc fields (a future refactor that
|
||||||
// drops one would silently let that field drift forever).
|
// drops one would silently let that field drift forever).
|
||||||
base := gateway.GatewayYAMLWebRTC{
|
base := gateway.GatewayYAMLWebRTC{
|
||||||
Enabled: true, SFUPort: 30000,
|
Enabled: true, SFUPort: 30000,
|
||||||
@ -69,6 +69,7 @@ func TestGatewayWebRTCInSync_eachFieldDriftDetected(t *testing.T) {
|
|||||||
{"sfu port changed", func(w *gateway.GatewayYAMLWebRTC) { w.SFUPort = 30001 }},
|
{"sfu port changed", func(w *gateway.GatewayYAMLWebRTC) { w.SFUPort = 30001 }},
|
||||||
{"turn domain changed", func(w *gateway.GatewayYAMLWebRTC) { w.TURNDomain = "turn.other" }},
|
{"turn domain changed", func(w *gateway.GatewayYAMLWebRTC) { w.TURNDomain = "turn.other" }},
|
||||||
{"turn secret rotated", func(w *gateway.GatewayYAMLWebRTC) { w.TURNSecret = "rotated" }},
|
{"turn secret rotated", func(w *gateway.GatewayYAMLWebRTC) { w.TURNSecret = "rotated" }},
|
||||||
|
{"stealth domain changed", func(w *gateway.GatewayYAMLWebRTC) { w.TURNStealthDomain = "cdn-deadbeef0000.orama-devnet.network" }},
|
||||||
}
|
}
|
||||||
for _, tc := range mutations {
|
for _, tc := range mutations {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
@ -190,3 +191,25 @@ func TestReconcileGateway_missingConfigReturnsErrorNotRestart(t *testing.T) {
|
|||||||
t.Error("missing config must return an error (don't blind-restart a healthy gateway)")
|
t.Error("missing config must return an error (don't blind-restart a healthy gateway)")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGatewayWebRTCInSync_stealthEnableDetectedAsDrift(t *testing.T) {
|
||||||
|
// feat-124: enabling stealth must drift an otherwise-matching gateway so
|
||||||
|
// the reconciler rewrites its yaml with turn_stealth_domain and restarts
|
||||||
|
// it — that's how turn.credentials starts advertising turns:<host>:443.
|
||||||
|
onDisk := gateway.GatewayYAMLWebRTC{
|
||||||
|
Enabled: true, SFUPort: 30000,
|
||||||
|
TURNDomain: "turn.ns-anchat-test.orama-devnet.network", TURNSecret: "the-secret",
|
||||||
|
}
|
||||||
|
desired := desiredEnabled()
|
||||||
|
desired.TURNStealthDomain = "cdn-abc123def456.orama-devnet.network"
|
||||||
|
if gatewayWebRTCInSync(onDisk, desired) {
|
||||||
|
t.Error("stealth enable not detected as drift — gateway would never advertise the stealth URI")
|
||||||
|
}
|
||||||
|
|
||||||
|
// And once the yaml carries it, the same desired config is in-sync (no
|
||||||
|
// restart loop).
|
||||||
|
onDisk.TURNStealthDomain = desired.TURNStealthDomain
|
||||||
|
if !gatewayWebRTCInSync(onDisk, desired) {
|
||||||
|
t.Error("matching stealth domain reported as drift — restart loop")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -11,11 +11,11 @@ import "testing"
|
|||||||
// port is per-node (0 on a gateway-only node). Pins both the drift
|
// port is per-node (0 on a gateway-only node). Pins both the drift
|
||||||
// fallback and the non-SFU-gateway case.
|
// fallback and the non-SFU-gateway case.
|
||||||
|
|
||||||
// dbFetch signature: () -> (turnSecret, turnDomain string, sfuPort int).
|
// dbFetch signature: () -> (turnSecret, turnDomain, stealthDomain string, sfuPort int).
|
||||||
func dbNone() (string, string, int) { return "", "", 0 }
|
func dbNone() (string, string, string, int) { return "", "", "", 0 }
|
||||||
|
|
||||||
func dbFull(secret, domain string, sfuPort int) func() (string, string, int) {
|
func dbFull(secret, domain string, sfuPort int) func() (string, string, string, int) {
|
||||||
return func() (string, string, int) { return secret, domain, sfuPort }
|
return func() (string, string, string, int) { return secret, domain, "", sfuPort }
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
|
func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
|
||||||
@ -23,8 +23,8 @@ func TestChooseRestoreWebRTC_stateFileCompleteWins(t *testing.T) {
|
|||||||
// (the lazy dbFetch must not be called — saves a query on the hot
|
// (the lazy dbFetch must not be called — saves a query on the hot
|
||||||
// restart path).
|
// restart path).
|
||||||
dbCalled := false
|
dbCalled := false
|
||||||
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret",
|
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "",
|
||||||
func() (string, string, int) { dbCalled = true; return dbNone() })
|
func() (string, string, string, int) { dbCalled = true; return dbNone() })
|
||||||
|
|
||||||
if dbCalled {
|
if dbCalled {
|
||||||
t.Error("DB fetch was called even though the state file had the TURN secret (should short-circuit)")
|
t.Error("DB fetch was called even though the state file had the TURN secret (should short-circuit)")
|
||||||
@ -41,7 +41,7 @@ func TestChooseRestoreWebRTC_staleStateFallsBackToDB(t *testing.T) {
|
|||||||
// The bug-25 drift case: state file has NO webrtc (stale — written
|
// The bug-25 drift case: state file has NO webrtc (stale — written
|
||||||
// before enable), DB says enabled WITH an SFU port on this node. MUST
|
// before enable), DB says enabled WITH an SFU port on this node. MUST
|
||||||
// fall back to the DB and re-materialize the full block.
|
// fall back to the DB and re-materialize the full block.
|
||||||
got := chooseRestoreWebRTC(false, 0, "", "",
|
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||||
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 7801))
|
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 7801))
|
||||||
|
|
||||||
if !got.enabled {
|
if !got.enabled {
|
||||||
@ -65,7 +65,7 @@ func TestChooseRestoreWebRTC_nonSFUGatewayGetsTURNOnly(t *testing.T) {
|
|||||||
// secret (so /v1/webrtc/turn/credentials registers + works) while
|
// secret (so /v1/webrtc/turn/credentials registers + works) while
|
||||||
// sfuPort stays 0 (signal/rooms don't register). This is exactly node
|
// sfuPort stays 0 (signal/rooms don't register). This is exactly node
|
||||||
// 57's situation — pre-fix it resolved to disabled and 404'd.
|
// 57's situation — pre-fix it resolved to disabled and 404'd.
|
||||||
got := chooseRestoreWebRTC(false, 0, "", "",
|
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||||
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 0)) // sfuPort 0 = no local SFU
|
dbFull("db-secret", "turn.ns-anchat-test.dbrs.space", 0)) // sfuPort 0 = no local SFU
|
||||||
|
|
||||||
if !got.enabled {
|
if !got.enabled {
|
||||||
@ -84,8 +84,8 @@ func TestChooseRestoreWebRTC_stateHasTURNButNoSFU(t *testing.T) {
|
|||||||
// false / port 0. Must use the state TURN secret with sfuPort=0 and
|
// false / port 0. Must use the state TURN secret with sfuPort=0 and
|
||||||
// NOT consult the DB (TURN secret present = complete enough).
|
// NOT consult the DB (TURN secret present = complete enough).
|
||||||
dbCalled := false
|
dbCalled := false
|
||||||
got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret",
|
got := chooseRestoreWebRTC(false, 0, "turn.ns-x.dbrs.space", "state-secret", "",
|
||||||
func() (string, string, int) { dbCalled = true; return dbNone() })
|
func() (string, string, string, int) { dbCalled = true; return dbNone() })
|
||||||
|
|
||||||
if dbCalled {
|
if dbCalled {
|
||||||
t.Error("DB fetch called even though state file had the TURN secret")
|
t.Error("DB fetch called even though state file had the TURN secret")
|
||||||
@ -98,7 +98,7 @@ func TestChooseRestoreWebRTC_stateHasTURNButNoSFU(t *testing.T) {
|
|||||||
func TestChooseRestoreWebRTC_bothEmptyDisabled(t *testing.T) {
|
func TestChooseRestoreWebRTC_bothEmptyDisabled(t *testing.T) {
|
||||||
// Namespace genuinely without WebRTC: state empty, DB returns nothing.
|
// Namespace genuinely without WebRTC: state empty, DB returns nothing.
|
||||||
// Must return disabled so we don't register broken webrtc routes.
|
// Must return disabled so we don't register broken webrtc routes.
|
||||||
got := chooseRestoreWebRTC(false, 0, "", "", dbNone)
|
got := chooseRestoreWebRTC(false, 0, "", "", "", dbNone)
|
||||||
if got.enabled {
|
if got.enabled {
|
||||||
t.Errorf("want disabled when neither source has WebRTC; got %+v", got)
|
t.Errorf("want disabled when neither source has WebRTC; got %+v", got)
|
||||||
}
|
}
|
||||||
@ -109,8 +109,8 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {
|
|||||||
// provisioned / shouldn't happen). The TURN secret is the
|
// provisioned / shouldn't happen). The TURN secret is the
|
||||||
// enablement marker; without it we treat it as not-configured-for-
|
// enablement marker; without it we treat it as not-configured-for-
|
||||||
// TURN, but an SFU port alone still enables SFU routes.
|
// TURN, but an SFU port alone still enables SFU routes.
|
||||||
got := chooseRestoreWebRTC(false, 0, "", "",
|
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||||
func() (string, string, int) { return "", "turn.db", 9000 })
|
func() (string, string, string, int) { return "", "turn.db", "", 9000 })
|
||||||
// dbFetch only runs when state secret is empty; here it returns no
|
// dbFetch only runs when state secret is empty; here it returns no
|
||||||
// secret, so the `if dbSecret != ""` guard means NOTHING is taken
|
// secret, so the `if dbSecret != ""` guard means NOTHING is taken
|
||||||
// from the DB → disabled. (An SFU-only-no-TURN namespace is not a
|
// from the DB → disabled. (An SFU-only-no-TURN namespace is not a
|
||||||
@ -119,3 +119,39 @@ func TestChooseRestoreWebRTC_dbNoSecretStaysDisabled(t *testing.T) {
|
|||||||
t.Errorf("DB returned no TURN secret: want disabled; got %+v", got)
|
t.Errorf("DB returned no TURN secret: want disabled; got %+v", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- feat-124 stealth domain restore precedence ---
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_stealthFromStateFile(t *testing.T) {
|
||||||
|
// Stealth toggles rewrite cluster state, so a fresh state file carries
|
||||||
|
// the stealth domain and must win without a DB call.
|
||||||
|
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "cdn-abc123def456.dbrs.space",
|
||||||
|
func() (string, string, string, int) {
|
||||||
|
t.Error("DB fetch called even though state file was complete")
|
||||||
|
return dbNone()
|
||||||
|
})
|
||||||
|
if got.stealthDomain != "cdn-abc123def456.dbrs.space" {
|
||||||
|
t.Errorf("stealthDomain = %q; want state-file value", got.stealthDomain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_stealthFromDBOnStaleState(t *testing.T) {
|
||||||
|
// Stale state (no TURN secret) + DB has stealth enabled → stealth domain
|
||||||
|
// re-materializes from the DB alongside the rest of the WebRTC block.
|
||||||
|
got := chooseRestoreWebRTC(false, 0, "", "", "",
|
||||||
|
func() (string, string, string, int) {
|
||||||
|
return "db-secret", "turn.ns-x.dbrs.space", "cdn-abc123def456.dbrs.space", 7801
|
||||||
|
})
|
||||||
|
if !got.enabled || got.stealthDomain != "cdn-abc123def456.dbrs.space" {
|
||||||
|
t.Errorf("want stealth domain from DB on stale state; got %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestChooseRestoreWebRTC_noStealthStaysEmpty(t *testing.T) {
|
||||||
|
// Stealth disabled everywhere → empty stealthDomain (gateway advertises
|
||||||
|
// the baseline 3-rung ladder only).
|
||||||
|
got := chooseRestoreWebRTC(true, 7800, "turn.ns-x.dbrs.space", "state-secret", "", dbNone)
|
||||||
|
if got.stealthDomain != "" {
|
||||||
|
t.Errorf("stealthDomain = %q; want empty when stealth is disabled", got.stealthDomain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -238,6 +238,7 @@ func (s *SystemdSpawner) SpawnGateway(ctx context.Context, namespace, nodeID str
|
|||||||
SFUPort: cfg.SFUPort,
|
SFUPort: cfg.SFUPort,
|
||||||
TURNDomain: cfg.TURNDomain,
|
TURNDomain: cfg.TURNDomain,
|
||||||
TURNSecret: cfg.TURNSecret,
|
TURNSecret: cfg.TURNSecret,
|
||||||
|
TURNStealthDomain: cfg.TURNStealthDomain,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -343,7 +344,8 @@ func gatewayWebRTCInSync(onDisk gateway.GatewayYAMLWebRTC, cfg gateway.InstanceC
|
|||||||
return onDisk.Enabled == cfg.WebRTCEnabled &&
|
return onDisk.Enabled == cfg.WebRTCEnabled &&
|
||||||
onDisk.SFUPort == cfg.SFUPort &&
|
onDisk.SFUPort == cfg.SFUPort &&
|
||||||
onDisk.TURNSecret == cfg.TURNSecret &&
|
onDisk.TURNSecret == cfg.TURNSecret &&
|
||||||
onDisk.TURNDomain == cfg.TURNDomain
|
onDisk.TURNDomain == cfg.TURNDomain &&
|
||||||
|
onDisk.TURNStealthDomain == cfg.TURNStealthDomain
|
||||||
}
|
}
|
||||||
|
|
||||||
// gatewayConfigInSync reports whether the full reconcile-relevant config on
|
// gatewayConfigInSync reports whether the full reconcile-relevant config on
|
||||||
@ -516,6 +518,68 @@ type TURNInstanceConfig struct {
|
|||||||
RelayPortStart int // Start of relay port range
|
RelayPortStart int // Start of relay port range
|
||||||
RelayPortEnd int // End of relay port range
|
RelayPortEnd int // End of relay port range
|
||||||
TURNDomain string // TURN domain for Let's Encrypt cert (e.g., "turn.ns-myapp.orama-devnet.network")
|
TURNDomain string // TURN domain for Let's Encrypt cert (e.g., "turn.ns-myapp.orama-devnet.network")
|
||||||
|
// StealthDomain is the neutral stealth TURNS host (feat-124). When set,
|
||||||
|
// the TURN server carries a second Let's Encrypt cert for this name and
|
||||||
|
// serves it to TLS clients whose SNI matches — the path the SNI router
|
||||||
|
// forwards from :443. Stealth NEVER falls back to a self-signed cert: a
|
||||||
|
// cert clients reject is indistinguishable from being blocked.
|
||||||
|
StealthDomain string
|
||||||
|
}
|
||||||
|
|
||||||
|
// acmeInternalEndpoint is the gateway's internal ACME endpoint that the
|
||||||
|
// Caddyfile TURN-cert blocks point the orama DNS provider at.
|
||||||
|
const acmeInternalEndpoint = "http://localhost:6001/v1/internal/acme"
|
||||||
|
|
||||||
|
// turnCertProvisionTimeout bounds how long a TURN spawn waits for Caddy to
|
||||||
|
// provision a Let's Encrypt cert before falling back (primary domain) or
|
||||||
|
// failing (stealth domain).
|
||||||
|
const turnCertProvisionTimeout = 2 * time.Minute
|
||||||
|
|
||||||
|
// resolveTURNSCert resolves the TURNS cert/key pair for a domain.
|
||||||
|
//
|
||||||
|
// Let's Encrypt via Caddy is tried FIRST whenever a domain is set — the call
|
||||||
|
// is idempotent and instant when the cert is already in Caddy's storage. This
|
||||||
|
// ordering also self-heals nodes stuck on the self-signed fallback from an
|
||||||
|
// earlier failed provisioning (live devnet finding, feat-124): the old code
|
||||||
|
// never retried Caddy once a self-signed pair existed on disk, so strict TLS
|
||||||
|
// clients kept failing turns: validation forever.
|
||||||
|
//
|
||||||
|
// allowSelfSigned controls the fallback: the primary TURN domain may fall
|
||||||
|
// back to (or reuse) a self-signed pair at <configDir>/turn-{cert,key}.pem so
|
||||||
|
// baseline TURN stays up, while the stealth domain must hard-fail instead.
|
||||||
|
func (s *SystemdSpawner) resolveTURNSCert(namespace, domain, publicIP, configDir string, allowSelfSigned bool) (string, string, error) {
|
||||||
|
if domain != "" {
|
||||||
|
caddyCert, caddyKey, err := provisionTURNCertViaCaddy(domain, acmeInternalEndpoint, turnCertProvisionTimeout)
|
||||||
|
if err == nil {
|
||||||
|
s.logger.Info("Using Let's Encrypt cert from Caddy for TURNS",
|
||||||
|
zap.String("namespace", namespace),
|
||||||
|
zap.String("domain", domain),
|
||||||
|
zap.String("cert_path", caddyCert))
|
||||||
|
return caddyCert, caddyKey, nil
|
||||||
|
}
|
||||||
|
if !allowSelfSigned {
|
||||||
|
return "", "", fmt.Errorf("failed to provision Let's Encrypt cert for stealth TURNS domain %s (no self-signed fallback — clients must be able to validate it): %w", domain, err)
|
||||||
|
}
|
||||||
|
s.logger.Warn("Let's Encrypt cert provisioning failed, falling back to self-signed",
|
||||||
|
zap.String("namespace", namespace),
|
||||||
|
zap.String("domain", domain),
|
||||||
|
zap.Error(err))
|
||||||
|
}
|
||||||
|
if !allowSelfSigned {
|
||||||
|
return "", "", fmt.Errorf("no domain configured for TURNS cert in namespace %s", namespace)
|
||||||
|
}
|
||||||
|
|
||||||
|
certPath := filepath.Join(configDir, "turn-cert.pem")
|
||||||
|
keyPath := filepath.Join(configDir, "turn-key.pem")
|
||||||
|
if _, err := os.Stat(certPath); os.IsNotExist(err) {
|
||||||
|
if err := turn.GenerateSelfSignedCert(certPath, keyPath, publicIP); err != nil {
|
||||||
|
return "", "", fmt.Errorf("failed to generate TURNS self-signed cert for namespace %s: %w", namespace, err)
|
||||||
|
}
|
||||||
|
s.logger.Info("Generated TURNS self-signed certificate",
|
||||||
|
zap.String("namespace", namespace),
|
||||||
|
zap.String("cert_path", certPath))
|
||||||
|
}
|
||||||
|
return certPath, keyPath, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SpawnTURN starts a TURN instance using systemd
|
// SpawnTURN starts a TURN instance using systemd
|
||||||
@ -534,42 +598,47 @@ func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string
|
|||||||
|
|
||||||
configPath := filepath.Join(configDir, fmt.Sprintf("turn-%s.yaml", nodeID))
|
configPath := filepath.Join(configDir, fmt.Sprintf("turn-%s.yaml", nodeID))
|
||||||
|
|
||||||
// Provision TLS cert for TURNS — try Let's Encrypt via Caddy first, fall back to self-signed
|
// Provision TLS cert for TURNS — Let's Encrypt via Caddy first (idempotent,
|
||||||
certPath := filepath.Join(configDir, "turn-cert.pem")
|
// also upgrades nodes stuck on the self-signed fallback), self-signed as
|
||||||
keyPath := filepath.Join(configDir, "turn-key.pem")
|
// the primary-domain fallback only.
|
||||||
|
var certPath, keyPath string
|
||||||
if cfg.TURNSListenAddr != "" {
|
if cfg.TURNSListenAddr != "" {
|
||||||
if _, err := os.Stat(certPath); os.IsNotExist(err) {
|
var certErr error
|
||||||
// Try Let's Encrypt via Caddy first
|
certPath, keyPath, certErr = s.resolveTURNSCert(namespace, cfg.TURNDomain, cfg.PublicIP, configDir, true)
|
||||||
if cfg.TURNDomain != "" {
|
if certErr != nil {
|
||||||
acmeEndpoint := "http://localhost:6001/v1/internal/acme"
|
s.logger.Warn("Failed to resolve TURNS cert, TURNS will be disabled",
|
||||||
caddyCert, caddyKey, provErr := provisionTURNCertViaCaddy(cfg.TURNDomain, acmeEndpoint, 2*time.Minute)
|
|
||||||
if provErr == nil {
|
|
||||||
certPath = caddyCert
|
|
||||||
keyPath = caddyKey
|
|
||||||
s.logger.Info("Using Let's Encrypt cert from Caddy for TURNS",
|
|
||||||
zap.String("namespace", namespace),
|
zap.String("namespace", namespace),
|
||||||
zap.String("domain", cfg.TURNDomain),
|
zap.Error(certErr))
|
||||||
zap.String("cert_path", certPath))
|
cfg.TURNSListenAddr = "" // Disable TURNS if no cert is available
|
||||||
} else {
|
|
||||||
s.logger.Warn("Let's Encrypt cert provisioning failed, falling back to self-signed",
|
|
||||||
zap.String("namespace", namespace),
|
|
||||||
zap.String("domain", cfg.TURNDomain),
|
|
||||||
zap.Error(provErr))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Fallback: generate self-signed cert if no cert is available yet
|
|
||||||
if _, statErr := os.Stat(certPath); os.IsNotExist(statErr) {
|
// Stealth TURNS cert (feat-124): requires a working TURNS listener and a
|
||||||
if err := turn.GenerateSelfSignedCert(certPath, keyPath, cfg.PublicIP); err != nil {
|
// CA-valid cert — hard error, never a silent downgrade, because the
|
||||||
s.logger.Warn("Failed to generate TURNS self-signed cert, TURNS will be disabled",
|
// operator explicitly enabled stealth and a half-working stealth endpoint
|
||||||
zap.String("namespace", namespace),
|
// is invisible until a censored-region user fails to connect.
|
||||||
zap.Error(err))
|
var stealthCertPath, stealthKeyPath string
|
||||||
cfg.TURNSListenAddr = "" // Disable TURNS if cert generation fails
|
if cfg.StealthDomain != "" {
|
||||||
} else {
|
// Security: the stealth domain arrives over the spawn protocol (mesh
|
||||||
s.logger.Info("Generated TURNS self-signed certificate",
|
// peers gated only by the static internal-auth header). Before it
|
||||||
zap.String("namespace", namespace),
|
// reaches the Caddyfile/ACME sink, pin it to the deterministic
|
||||||
zap.String("cert_path", certPath))
|
// derivation so a forged value can't drive cert issuance for an
|
||||||
|
// attacker-chosen name. cfg.Realm is the base domain on every TURN
|
||||||
|
// spawn site. (provisionTURNCertViaCaddy adds a DNS-name allowlist as
|
||||||
|
// defense-in-depth.)
|
||||||
|
if cfg.Realm != "" {
|
||||||
|
want := turn.StealthHostForNamespace(cfg.Namespace, cfg.Realm)
|
||||||
|
if cfg.StealthDomain != want {
|
||||||
|
return fmt.Errorf("stealth domain %q does not match the derived host %q for namespace %s — refusing to provision", cfg.StealthDomain, want, cfg.Namespace)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if cfg.TURNSListenAddr == "" {
|
||||||
|
return fmt.Errorf("stealth TURNS for namespace %s requires an active TURNS listener (no TLS cert/listener available)", namespace)
|
||||||
|
}
|
||||||
|
var stealthErr error
|
||||||
|
stealthCertPath, stealthKeyPath, stealthErr = s.resolveTURNSCert(namespace, cfg.StealthDomain, cfg.PublicIP, configDir, false)
|
||||||
|
if stealthErr != nil {
|
||||||
|
return fmt.Errorf("failed to provision stealth TURNS cert for namespace %s: %w", namespace, stealthErr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -588,6 +657,11 @@ func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string
|
|||||||
turnConfig.TLSCertPath = certPath
|
turnConfig.TLSCertPath = certPath
|
||||||
turnConfig.TLSKeyPath = keyPath
|
turnConfig.TLSKeyPath = keyPath
|
||||||
}
|
}
|
||||||
|
if stealthCertPath != "" {
|
||||||
|
turnConfig.StealthDomain = cfg.StealthDomain
|
||||||
|
turnConfig.TLSStealthCertPath = stealthCertPath
|
||||||
|
turnConfig.TLSStealthKeyPath = stealthKeyPath
|
||||||
|
}
|
||||||
|
|
||||||
configBytes, err := yaml.Marshal(turnConfig)
|
configBytes, err := yaml.Marshal(turnConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@ -5,10 +5,20 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// dnsNamePattern matches a conservative lowercase DNS hostname. It exists to
|
||||||
|
// keep an operator/spawn-supplied domain from breaking out of the Caddyfile
|
||||||
|
// block it is interpolated into (a value containing '{', '}', or a newline
|
||||||
|
// could otherwise inject arbitrary Caddy directives) and to refuse cert
|
||||||
|
// provisioning for non-hostname junk. Security: defense-in-depth at the
|
||||||
|
// Caddyfile sink; the caller also pins the stealth domain to its deterministic
|
||||||
|
// derivation (systemd_spawner.go SpawnTURN).
|
||||||
|
var dnsNamePattern = regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$`)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
caddyfilePath = "/etc/caddy/Caddyfile"
|
caddyfilePath = "/etc/caddy/Caddyfile"
|
||||||
|
|
||||||
@ -25,6 +35,12 @@ const (
|
|||||||
// If Caddy is not available or cert provisioning times out, returns an error
|
// If Caddy is not available or cert provisioning times out, returns an error
|
||||||
// so the caller can fall back to a self-signed cert.
|
// so the caller can fall back to a self-signed cert.
|
||||||
func provisionTURNCertViaCaddy(domain, acmeEndpoint string, timeout time.Duration) (certPath, keyPath string, err error) {
|
func provisionTURNCertViaCaddy(domain, acmeEndpoint string, timeout time.Duration) (certPath, keyPath string, err error) {
|
||||||
|
// Refuse anything that isn't a clean DNS name before it reaches the
|
||||||
|
// Caddyfile write — blocks Caddyfile-injection via crafted domains.
|
||||||
|
if !dnsNamePattern.MatchString(domain) {
|
||||||
|
return "", "", fmt.Errorf("refusing to provision TURNS cert for non-DNS-name domain %q", domain)
|
||||||
|
}
|
||||||
|
|
||||||
// Check if cert already exists from a previous provisioning
|
// Check if cert already exists from a previous provisioning
|
||||||
certPath, keyPath = caddyCertPaths(domain)
|
certPath, keyPath = caddyCertPaths(domain)
|
||||||
if _, err := os.Stat(certPath); err == nil {
|
if _, err := os.Stat(certPath); err == nil {
|
||||||
|
|||||||
108
core/pkg/namespace/turn_stealth_cert_test.go
Normal file
108
core/pkg/namespace/turn_stealth_cert_test.go
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
package namespace
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// feat-124 — resolveTURNSCert semantics.
|
||||||
|
//
|
||||||
|
// On machines without a Caddyfile (tests, dev laptops) the Let's Encrypt
|
||||||
|
// branch fails fast with "failed to read Caddyfile", exercising exactly the
|
||||||
|
// fallback decision this function owns: primary domains degrade to a
|
||||||
|
// self-signed pair, the stealth domain must hard-fail instead.
|
||||||
|
|
||||||
|
func testSpawner(t *testing.T) *SystemdSpawner {
|
||||||
|
t.Helper()
|
||||||
|
return &SystemdSpawner{logger: zap.NewNop()}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveTURNSCert_primaryFallsBackToSelfSigned(t *testing.T) {
|
||||||
|
s := testSpawner(t)
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
certPath, keyPath, err := s.resolveTURNSCert("ns-test", "turn.ns-test.example.com", "203.0.113.7", dir, true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected self-signed fallback, got error: %v", err)
|
||||||
|
}
|
||||||
|
if certPath != filepath.Join(dir, "turn-cert.pem") || keyPath != filepath.Join(dir, "turn-key.pem") {
|
||||||
|
t.Errorf("unexpected fallback paths: %s / %s", certPath, keyPath)
|
||||||
|
}
|
||||||
|
if _, statErr := os.Stat(certPath); statErr != nil {
|
||||||
|
t.Errorf("self-signed cert not written: %v", statErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveTURNSCert_existingSelfSignedReused(t *testing.T) {
|
||||||
|
s := testSpawner(t)
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
first, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", dir, true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("first resolve: %v", err)
|
||||||
|
}
|
||||||
|
info1, err := os.Stat(first)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("stat first cert: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
second, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", dir, true)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("second resolve: %v", err)
|
||||||
|
}
|
||||||
|
info2, err := os.Stat(second)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("stat second cert: %v", err)
|
||||||
|
}
|
||||||
|
if first != second || info1.ModTime() != info2.ModTime() {
|
||||||
|
t.Error("existing self-signed pair was regenerated instead of reused")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveTURNSCert_stealthNeverFallsBackToSelfSigned(t *testing.T) {
|
||||||
|
s := testSpawner(t)
|
||||||
|
dir := t.TempDir()
|
||||||
|
|
||||||
|
_, _, err := s.resolveTURNSCert("ns-test", "cdn-abc123def456.example.com", "203.0.113.7", dir, false)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("stealth cert resolution must hard-fail without Let's Encrypt — a self-signed stealth cert is indistinguishable from being blocked")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "cdn-abc123def456.example.com") {
|
||||||
|
t.Errorf("error must name the stealth domain for the operator; got: %v", err)
|
||||||
|
}
|
||||||
|
if _, statErr := os.Stat(filepath.Join(dir, "turn-cert.pem")); !os.IsNotExist(statErr) {
|
||||||
|
t.Error("stealth failure must not write a self-signed pair")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveTURNSCert_noDomainNoFallbackErrors(t *testing.T) {
|
||||||
|
s := testSpawner(t)
|
||||||
|
_, _, err := s.resolveTURNSCert("ns-test", "", "203.0.113.7", t.TempDir(), false)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("empty domain with self-signed disallowed must error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Security (feat-124): the Caddyfile sink must refuse any domain that isn't a
|
||||||
|
// clean DNS name, so a crafted value can't break out of the generated block
|
||||||
|
// and inject Caddy directives.
|
||||||
|
func TestProvisionTURNCertViaCaddy_rejectsNonDNSName(t *testing.T) {
|
||||||
|
bad := []string{
|
||||||
|
"example.com {\n reverse_proxy evil:1234\n}\n#",
|
||||||
|
"has space.com",
|
||||||
|
"UPPER.example.com",
|
||||||
|
"nodots",
|
||||||
|
"trailing-.example.com",
|
||||||
|
"",
|
||||||
|
}
|
||||||
|
for _, d := range bad {
|
||||||
|
if _, _, err := provisionTURNCertViaCaddy(d, "http://localhost:6001/v1/internal/acme", time.Second); err == nil {
|
||||||
|
t.Errorf("provisionTURNCertViaCaddy(%q) accepted a non-DNS-name domain", d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -249,6 +249,8 @@ var (
|
|||||||
ErrRecoveryInProgress = &ClusterError{Message: "recovery already in progress for this cluster"}
|
ErrRecoveryInProgress = &ClusterError{Message: "recovery already in progress for this cluster"}
|
||||||
ErrWebRTCAlreadyEnabled = &ClusterError{Message: "WebRTC is already enabled for this namespace"}
|
ErrWebRTCAlreadyEnabled = &ClusterError{Message: "WebRTC is already enabled for this namespace"}
|
||||||
ErrWebRTCNotEnabled = &ClusterError{Message: "WebRTC is not enabled for this namespace"}
|
ErrWebRTCNotEnabled = &ClusterError{Message: "WebRTC is not enabled for this namespace"}
|
||||||
|
ErrWebRTCStealthAlreadyEnabled = &ClusterError{Message: "WebRTC stealth is already enabled for this namespace"}
|
||||||
|
ErrWebRTCStealthNotEnabled = &ClusterError{Message: "WebRTC stealth is not enabled for this namespace"}
|
||||||
ErrNoWebRTCPortsAvailable = &ClusterError{Message: "no WebRTC ports available on node"}
|
ErrNoWebRTCPortsAvailable = &ClusterError{Message: "no WebRTC ports available on node"}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -262,6 +264,10 @@ type WebRTCConfig struct {
|
|||||||
TURNCredentialTTL int `json:"turn_credential_ttl" db:"turn_credential_ttl"`
|
TURNCredentialTTL int `json:"turn_credential_ttl" db:"turn_credential_ttl"`
|
||||||
SFUNodeCount int `json:"sfu_node_count" db:"sfu_node_count"`
|
SFUNodeCount int `json:"sfu_node_count" db:"sfu_node_count"`
|
||||||
TURNNodeCount int `json:"turn_node_count" db:"turn_node_count"`
|
TURNNodeCount int `json:"turn_node_count" db:"turn_node_count"`
|
||||||
|
// StealthEnabled gates the censorship-resistant TURNS:443 path (feat-124):
|
||||||
|
// stealth cert on the TURN servers, SNI route on :443, and the
|
||||||
|
// `turns:<stealth-host>:443` rung in the turn.credentials URI ladder.
|
||||||
|
StealthEnabled bool `json:"stealth_enabled" db:"stealth_enabled"`
|
||||||
EnabledBy string `json:"enabled_by" db:"enabled_by"`
|
EnabledBy string `json:"enabled_by" db:"enabled_by"`
|
||||||
EnabledAt time.Time `json:"enabled_at" db:"enabled_at"`
|
EnabledAt time.Time `json:"enabled_at" db:"enabled_at"`
|
||||||
DisabledAt *time.Time `json:"disabled_at,omitempty" db:"disabled_at"`
|
DisabledAt *time.Time `json:"disabled_at,omitempty" db:"disabled_at"`
|
||||||
|
|||||||
@ -828,6 +828,7 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
|
|||||||
NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast").
|
NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast").
|
||||||
NewFunctionBuilder().WithFunc(e.hEphemeralStateSet).Export("ephemeral_state_set").
|
NewFunctionBuilder().WithFunc(e.hEphemeralStateSet).Export("ephemeral_state_set").
|
||||||
NewFunctionBuilder().WithFunc(e.hEphemeralStateClear).Export("ephemeral_state_clear").
|
NewFunctionBuilder().WithFunc(e.hEphemeralStateClear).Export("ephemeral_state_clear").
|
||||||
|
NewFunctionBuilder().WithFunc(e.hEphemeralStateList).Export("ephemeral_state_list").
|
||||||
NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke").
|
NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke").
|
||||||
NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async").
|
NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async").
|
||||||
NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info").
|
NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info").
|
||||||
@ -1463,6 +1464,33 @@ func (e *Engine) hEphemeralStateClear(ctx context.Context, mod api.Module,
|
|||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hEphemeralStateList is the WASM-callable wrapper for EphemeralStateList —
|
||||||
|
// the bugboard #710 reconnect catch-up read.
|
||||||
|
//
|
||||||
|
// ABI: ephemeral_state_list(topicPtr, topicLen uint32) -> uint64 packed
|
||||||
|
// (ptr<<32 | len) pointing to a JSON envelope in guest memory:
|
||||||
|
//
|
||||||
|
// {"entries":[{"key":..,"client_id":..,"payload":<base64>,"expires_in_ms":..}, …]}
|
||||||
|
//
|
||||||
|
// Returns 0 on failure (empty topic, no invocation context, ephemeral state
|
||||||
|
// unavailable, or a guest-memory error). Unlike set/clear, no WS client is
|
||||||
|
// required — the read is namespace-scoped via the invocation context.
|
||||||
|
func (e *Engine) hEphemeralStateList(ctx context.Context, mod api.Module,
|
||||||
|
topicPtr, topicLen uint32) uint64 {
|
||||||
|
topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
|
||||||
|
if !ok {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
out, err := e.hostServices.EphemeralStateList(ctx, string(topic))
|
||||||
|
if err != nil {
|
||||||
|
e.logger.Warn("host function ephemeral_state_list failed",
|
||||||
|
zap.String("topic", string(topic)),
|
||||||
|
zap.Error(err))
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return e.executor.WriteToGuest(ctx, mod, out)
|
||||||
|
}
|
||||||
|
|
||||||
// hPushSend is the WASM-callable wrapper for PushSend.
|
// hPushSend is the WASM-callable wrapper for PushSend.
|
||||||
// Inputs:
|
// Inputs:
|
||||||
//
|
//
|
||||||
|
|||||||
@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@ -47,26 +48,29 @@ const (
|
|||||||
ephemeralSweepInterval = 10 * time.Second
|
ephemeralSweepInterval = 10 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
// EphemeralEventKind discriminates the synthetic events published on a topic.
|
// Synthetic-event discriminator values carried in the `_orama` field. The
|
||||||
type EphemeralEventKind string
|
// `_orama` control-frame namespace is the contract agreed with app teams on
|
||||||
|
// bugboard #710 (#458/#505/#849/#901) — the same dispatch pattern clients
|
||||||
|
// already use for the auth.refresh control frame from #321.
|
||||||
const (
|
const (
|
||||||
EphemeralEventSet EphemeralEventKind = "set"
|
EphemeralEventSet = "ephemeral.set"
|
||||||
EphemeralEventClear EphemeralEventKind = "clear"
|
EphemeralEventClear = "ephemeral.clear"
|
||||||
)
|
)
|
||||||
|
|
||||||
// EphemeralEvent is the wire shape published on the topic when ephemeral state
|
// EphemeralEvent is the wire shape published on the topic when ephemeral state
|
||||||
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers key off
|
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers dispatch
|
||||||
// Kind + Key to update their local view. Payload is only populated for "set".
|
// on the `_orama` discriminator + Key to update their local view. Payload is
|
||||||
|
// only populated for "ephemeral.set".
|
||||||
type EphemeralEvent struct {
|
type EphemeralEvent struct {
|
||||||
Type string `json:"__ephemeral"` // always "state"
|
Type string `json:"_orama"` // "ephemeral.set" | "ephemeral.clear"
|
||||||
Kind EphemeralEventKind `json:"kind"` // set | clear
|
Topic string `json:"topic"` // the topic the state lives on (self-describing for sub-routers)
|
||||||
Key string `json:"key"` // app-chosen key
|
Key string `json:"key"` // app-chosen key
|
||||||
ClientID string `json:"client_id"` // owning WS client
|
ClientID string `json:"client_id"` // owning WS client
|
||||||
// Payload is the opaque app-chosen blob (may be JSON, protobuf, or
|
// Payload is the opaque app-chosen blob (may be JSON, protobuf, or
|
||||||
// arbitrary bytes), present only for "set". encoding/json base64-encodes
|
// arbitrary bytes), present only for "ephemeral.set". encoding/json
|
||||||
// a []byte on the wire, so subscribers base64-decode "payload" to recover
|
// base64-encodes a []byte on the wire, so subscribers base64-decode
|
||||||
// the original bytes — mirroring how pubsub_publish_batch carries data.
|
// "payload" to recover the original bytes — mirroring how
|
||||||
|
// pubsub_publish_batch carries data.
|
||||||
Payload []byte `json:"payload,omitempty"`
|
Payload []byte `json:"payload,omitempty"`
|
||||||
Reason string `json:"reason,omitempty"` // clear only: explicit|disconnect|expired
|
Reason string `json:"reason,omitempty"` // clear only: explicit|disconnect|expired
|
||||||
}
|
}
|
||||||
@ -192,8 +196,8 @@ func (s *EphemeralStore) Set(ctx context.Context, namespace, clientID, topic, ke
|
|||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
evt := EphemeralEvent{
|
evt := EphemeralEvent{
|
||||||
Type: "state",
|
Type: EphemeralEventSet,
|
||||||
Kind: EphemeralEventSet,
|
Topic: topic,
|
||||||
Key: key,
|
Key: key,
|
||||||
ClientID: clientID,
|
ClientID: clientID,
|
||||||
Payload: payloadCopy,
|
Payload: payloadCopy,
|
||||||
@ -225,14 +229,60 @@ func (s *EphemeralStore) Clear(ctx context.Context, namespace, clientID, topic,
|
|||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
|
||||||
return s.publishEvent(ctx, namespace, topic, EphemeralEvent{
|
return s.publishEvent(ctx, namespace, topic, EphemeralEvent{
|
||||||
Type: "state",
|
Type: EphemeralEventClear,
|
||||||
Kind: EphemeralEventClear,
|
Topic: topic,
|
||||||
Key: key,
|
Key: key,
|
||||||
ClientID: clientID,
|
ClientID: clientID,
|
||||||
Reason: "explicit",
|
Reason: "explicit",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EphemeralListEntry is one live entry returned by List — the reconnect
|
||||||
|
// catch-up shape for the ephemeral_state_list host fn. ExpiresInMs is relative
|
||||||
|
// (remaining TTL) so callers don't need a synchronized clock.
|
||||||
|
type EphemeralListEntry struct {
|
||||||
|
Key string `json:"key"`
|
||||||
|
ClientID string `json:"client_id"`
|
||||||
|
Payload []byte `json:"payload,omitempty"`
|
||||||
|
ExpiresInMs int64 `json:"expires_in_ms"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// List returns the live (non-expired) entries on a (namespace, topic), sorted
|
||||||
|
// by key for deterministic output. The reconnect catch-up path (bugboard #710
|
||||||
|
// acceptance): a client that just (re)subscribed reads the current state once,
|
||||||
|
// then tracks the ephemeral.set/ephemeral.clear event stream. Read-only — no
|
||||||
|
// ownership requirement, no WS client needed.
|
||||||
|
func (s *EphemeralStore) List(namespace, topic string) []EphemeralListEntry {
|
||||||
|
now := s.now()
|
||||||
|
|
||||||
|
s.mu.Lock()
|
||||||
|
entries := make([]EphemeralListEntry, 0)
|
||||||
|
for sk, entry := range s.values {
|
||||||
|
if sk.namespace != namespace || sk.topic != topic {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !now.Before(entry.expiresAt) {
|
||||||
|
// now >= expiresAt: hide it. Intentionally one tick stricter than
|
||||||
|
// sweepExpired (which removes only when now.After(expiresAt)) — a
|
||||||
|
// reconnect catch-up must never surface state that is at/past its
|
||||||
|
// deadline, even if the backstop sweeper hasn't run yet.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
payloadCopy := make([]byte, len(entry.payload))
|
||||||
|
copy(payloadCopy, entry.payload)
|
||||||
|
entries = append(entries, EphemeralListEntry{
|
||||||
|
Key: entry.key,
|
||||||
|
ClientID: entry.clientID,
|
||||||
|
Payload: payloadCopy,
|
||||||
|
ExpiresInMs: entry.expiresAt.Sub(now).Milliseconds(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
s.mu.Unlock()
|
||||||
|
|
||||||
|
sort.Slice(entries, func(i, j int) bool { return entries[i].Key < entries[j].Key })
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
// ClearClient removes every entry owned by clientID and publishes a clear
|
// ClearClient removes every entry owned by clientID and publishes a clear
|
||||||
// event for each (reason "disconnect"). Called from the WS disconnect hook —
|
// event for each (reason "disconnect"). Called from the WS disconnect hook —
|
||||||
// the primary, zero-lag cleanup path. Safe to call for an unknown client.
|
// the primary, zero-lag cleanup path. Safe to call for an unknown client.
|
||||||
@ -261,8 +311,8 @@ func (s *EphemeralStore) clearClientWithReason(ctx context.Context, clientID, re
|
|||||||
|
|
||||||
for _, entry := range toClear {
|
for _, entry := range toClear {
|
||||||
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
||||||
Type: "state",
|
Type: EphemeralEventClear,
|
||||||
Kind: EphemeralEventClear,
|
Topic: entry.topic,
|
||||||
Key: entry.key,
|
Key: entry.key,
|
||||||
ClientID: clientID,
|
ClientID: clientID,
|
||||||
Reason: reason,
|
Reason: reason,
|
||||||
@ -292,7 +342,7 @@ func (s *EphemeralStore) publishEvent(ctx context.Context, namespace, topic stri
|
|||||||
return fmt.Errorf("ephemeral state: marshal event: %w", err)
|
return fmt.Errorf("ephemeral state: marshal event: %w", err)
|
||||||
}
|
}
|
||||||
if err := s.publish(ctx, namespace, topic, data); err != nil {
|
if err := s.publish(ctx, namespace, topic, data); err != nil {
|
||||||
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Kind, err)
|
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Type, err)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -335,8 +385,8 @@ func (s *EphemeralStore) sweepExpired(ctx context.Context) {
|
|||||||
|
|
||||||
for _, entry := range expired {
|
for _, entry := range expired {
|
||||||
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
||||||
Type: "state",
|
Type: EphemeralEventClear,
|
||||||
Kind: EphemeralEventClear,
|
Topic: entry.topic,
|
||||||
Key: entry.key,
|
Key: entry.key,
|
||||||
ClientID: entry.clientID,
|
ClientID: entry.clientID,
|
||||||
Reason: "expired",
|
Reason: "expired",
|
||||||
|
|||||||
@ -40,12 +40,12 @@ func (c *capturePublisher) snapshot() []capturedEvent {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *capturePublisher) countKind(kind EphemeralEventKind) int {
|
func (c *capturePublisher) countKind(eventType string) int {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
n := 0
|
n := 0
|
||||||
for _, e := range c.events {
|
for _, e := range c.events {
|
||||||
if e.event.Kind == kind {
|
if e.event.Type == eventType {
|
||||||
n++
|
n++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -114,7 +114,7 @@ func TestEphemeralStore_SetThenDisconnect(t *testing.T) {
|
|||||||
t.Errorf("disconnect clear events = %d, want 2", got)
|
t.Errorf("disconnect clear events = %d, want 2", got)
|
||||||
}
|
}
|
||||||
for _, e := range pub.snapshot() {
|
for _, e := range pub.snapshot() {
|
||||||
if e.event.Kind == EphemeralEventClear && e.event.Reason != "disconnect" {
|
if e.event.Type == EphemeralEventClear && e.event.Reason != "disconnect" {
|
||||||
t.Errorf("clear reason = %q, want disconnect", e.event.Reason)
|
t.Errorf("clear reason = %q, want disconnect", e.event.Reason)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -149,7 +149,7 @@ func TestEphemeralStore_TTLExpiry(t *testing.T) {
|
|||||||
// A clear event with reason=expired must have been published.
|
// A clear event with reason=expired must have been published.
|
||||||
foundExpired := false
|
foundExpired := false
|
||||||
for _, e := range pub.snapshot() {
|
for _, e := range pub.snapshot() {
|
||||||
if e.event.Kind == EphemeralEventClear && e.event.Reason == "expired" {
|
if e.event.Type == EphemeralEventClear && e.event.Reason == "expired" {
|
||||||
foundExpired = true
|
foundExpired = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -293,3 +293,130 @@ func TestEphemeralStore_OwnershipTransfer(t *testing.T) {
|
|||||||
t.Errorf("new owner's disconnect did not clear, count=%d", s.keyCountForTest())
|
t.Errorf("new owner's disconnect did not clear, count=%d", s.keyCountForTest())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestEphemeralStore_wireFormatContract pins the EXACT JSON wire shape of the
|
||||||
|
// synthetic events — the `_orama` control-frame contract agreed with app teams
|
||||||
|
// on bugboard #710 (#458/#505/#849/#901). Client sub-routers dispatch on the
|
||||||
|
// `_orama` discriminator; renaming any of these fields is a breaking protocol
|
||||||
|
// change and must fail this test.
|
||||||
|
func TestEphemeralStore_wireFormatContract(t *testing.T) {
|
||||||
|
type raw struct {
|
||||||
|
Orama string `json:"_orama"`
|
||||||
|
Topic string `json:"topic"`
|
||||||
|
Key string `json:"key"`
|
||||||
|
ClientID string `json:"client_id"`
|
||||||
|
Payload []byte `json:"payload"`
|
||||||
|
Reason string `json:"reason"`
|
||||||
|
}
|
||||||
|
var got []raw
|
||||||
|
pub := func(_ context.Context, _, _ string, data []byte) error {
|
||||||
|
var r raw
|
||||||
|
if err := json.Unmarshal(data, &r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
got = append(got, r)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
s := newTestStore(pub)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
if err := s.Set(ctx, "ns1", "client-A", "typing:room1", "user-7", []byte("blob"), 0); err != nil {
|
||||||
|
t.Fatalf("Set: %v", err)
|
||||||
|
}
|
||||||
|
s.ClearClient(ctx, "client-A")
|
||||||
|
|
||||||
|
if len(got) != 2 {
|
||||||
|
t.Fatalf("expected 2 events (set + disconnect clear), got %d", len(got))
|
||||||
|
}
|
||||||
|
set, clear := got[0], got[1]
|
||||||
|
if set.Orama != "ephemeral.set" {
|
||||||
|
t.Errorf(`set _orama = %q, want "ephemeral.set"`, set.Orama)
|
||||||
|
}
|
||||||
|
if set.Topic != "typing:room1" || set.Key != "user-7" || set.ClientID != "client-A" {
|
||||||
|
t.Errorf("set event fields wrong: %+v", set)
|
||||||
|
}
|
||||||
|
if string(set.Payload) != "blob" {
|
||||||
|
t.Errorf("set payload = %q, want blob", set.Payload)
|
||||||
|
}
|
||||||
|
if clear.Orama != "ephemeral.clear" {
|
||||||
|
t.Errorf(`clear _orama = %q, want "ephemeral.clear"`, clear.Orama)
|
||||||
|
}
|
||||||
|
if clear.Topic != "typing:room1" || clear.Key != "user-7" || clear.Reason != "disconnect" {
|
||||||
|
t.Errorf("clear event fields wrong: %+v", clear)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEphemeralStoreList_returnsLiveEntriesSorted(t *testing.T) {
|
||||||
|
s := newTestStore(nil)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
if err := s.Set(ctx, "ns1", "client-B", "presence:room1", "zeta", []byte("z"), 0); err != nil {
|
||||||
|
t.Fatalf("Set zeta: %v", err)
|
||||||
|
}
|
||||||
|
if err := s.Set(ctx, "ns1", "client-A", "presence:room1", "alpha", []byte("a"), 0); err != nil {
|
||||||
|
t.Fatalf("Set alpha: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
entries := s.List("ns1", "presence:room1")
|
||||||
|
if len(entries) != 2 {
|
||||||
|
t.Fatalf("List returned %d entries, want 2", len(entries))
|
||||||
|
}
|
||||||
|
if entries[0].Key != "alpha" || entries[1].Key != "zeta" {
|
||||||
|
t.Errorf("entries not sorted by key: %q, %q", entries[0].Key, entries[1].Key)
|
||||||
|
}
|
||||||
|
if entries[0].ClientID != "client-A" || string(entries[0].Payload) != "a" {
|
||||||
|
t.Errorf("entry fields wrong: %+v", entries[0])
|
||||||
|
}
|
||||||
|
if entries[0].ExpiresInMs <= 0 {
|
||||||
|
t.Errorf("ExpiresInMs must be positive for a live entry, got %d", entries[0].ExpiresInMs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEphemeralStoreList_excludesExpiredAndOtherScopes(t *testing.T) {
|
||||||
|
s := newTestStore(nil)
|
||||||
|
ctx := context.Background()
|
||||||
|
base := time.Now()
|
||||||
|
s.now = func() time.Time { return base }
|
||||||
|
|
||||||
|
if err := s.Set(ctx, "ns1", "c", "t", "live", []byte("p"), 60_000); err != nil {
|
||||||
|
t.Fatalf("Set live: %v", err)
|
||||||
|
}
|
||||||
|
if err := s.Set(ctx, "ns1", "c", "t", "dying", []byte("p"), 1000); err != nil {
|
||||||
|
t.Fatalf("Set dying: %v", err)
|
||||||
|
}
|
||||||
|
if err := s.Set(ctx, "ns2", "c", "t", "other-ns", []byte("p"), 60_000); err != nil {
|
||||||
|
t.Fatalf("Set other-ns: %v", err)
|
||||||
|
}
|
||||||
|
if err := s.Set(ctx, "ns1", "c", "t2", "other-topic", []byte("p"), 60_000); err != nil {
|
||||||
|
t.Fatalf("Set other-topic: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance past "dying"'s TTL but do NOT sweep — List must hide it anyway.
|
||||||
|
s.now = func() time.Time { return base.Add(2 * time.Second) }
|
||||||
|
|
||||||
|
entries := s.List("ns1", "t")
|
||||||
|
if len(entries) != 1 || entries[0].Key != "live" {
|
||||||
|
t.Fatalf("List = %+v, want exactly the single live ns1/t entry", entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEphemeralStoreList_emptyTopicReturnsEmpty(t *testing.T) {
|
||||||
|
s := newTestStore(nil)
|
||||||
|
if entries := s.List("ns1", "nothing-here"); len(entries) != 0 {
|
||||||
|
t.Errorf("List on empty topic = %+v, want empty", entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEphemeralStoreList_snapshotIsDefensiveCopy(t *testing.T) {
|
||||||
|
s := newTestStore(nil)
|
||||||
|
ctx := context.Background()
|
||||||
|
if err := s.Set(ctx, "ns1", "c", "t", "k", []byte("orig"), 0); err != nil {
|
||||||
|
t.Fatalf("Set: %v", err)
|
||||||
|
}
|
||||||
|
entries := s.List("ns1", "t")
|
||||||
|
entries[0].Payload[0] = 'X'
|
||||||
|
fresh := s.List("ns1", "t")
|
||||||
|
if string(fresh[0].Payload) != "orig" {
|
||||||
|
t.Error("List payload is not a defensive copy; store was mutated")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -146,6 +146,10 @@ func (m *mockHostServices) EphemeralStateClear(ctx context.Context, topic, key s
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *mockHostServices) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
|
||||||
|
return []byte(`{"entries":[]}`), nil
|
||||||
|
}
|
||||||
|
|
||||||
func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -220,6 +220,34 @@ func (h *HostFunctions) EphemeralStateClear(ctx context.Context, topic, key stri
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ephemeralListEnvelope is the JSON shape returned by EphemeralStateList —
|
||||||
|
// an object (not a bare array) so fields can be added without breaking
|
||||||
|
// existing WASM callers.
|
||||||
|
type ephemeralListEnvelope struct {
|
||||||
|
Entries []serverless.EphemeralListEntry `json:"entries"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// EphemeralStateList returns the live ephemeral entries on a topic in the
|
||||||
|
// invocation's namespace (bugboard #710 reconnect catch-up). Read-only: no
|
||||||
|
// WS client required, so HTTP-invoked functions can serve snapshots too.
|
||||||
|
func (h *HostFunctions) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
|
||||||
|
if h.ephemeralStore == nil {
|
||||||
|
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
|
||||||
|
}
|
||||||
|
if topic == "" {
|
||||||
|
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("topic is required")}
|
||||||
|
}
|
||||||
|
cur := h.currentInvocationContext(ctx)
|
||||||
|
if cur == nil {
|
||||||
|
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("no invocation context")}
|
||||||
|
}
|
||||||
|
out, err := json.Marshal(ephemeralListEnvelope{Entries: h.ephemeralStore.List(cur.Namespace, topic)})
|
||||||
|
if err != nil {
|
||||||
|
return nil, &serverless.HostFunctionError{Function: "ephemeral_state_list", Cause: fmt.Errorf("marshal entries: %w", err)}
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
// WSSend sends data to a specific WebSocket client.
|
// WSSend sends data to a specific WebSocket client.
|
||||||
func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error {
|
func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||||
if h.wsManager == nil {
|
if h.wsManager == nil {
|
||||||
|
|||||||
@ -259,6 +259,10 @@ func (m *MockHostServices) EphemeralStateClear(ctx context.Context, topic, key s
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *MockHostServices) EphemeralStateList(ctx context.Context, topic string) ([]byte, error) {
|
||||||
|
return []byte(`{"entries":[]}`), nil
|
||||||
|
}
|
||||||
|
|
||||||
func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -595,6 +595,14 @@ type HostServices interface {
|
|||||||
// non-owned key is a no-op. Errors only on no-WS-client / empty topic-key.
|
// non-owned key is a no-op. Errors only on no-WS-client / empty topic-key.
|
||||||
EphemeralStateClear(ctx context.Context, topic, key string) error
|
EphemeralStateClear(ctx context.Context, topic, key string) error
|
||||||
|
|
||||||
|
// EphemeralStateList returns the live entries on a topic in the current
|
||||||
|
// invocation's namespace as a JSON envelope:
|
||||||
|
// {"entries":[{"key":..,"client_id":..,"payload":<base64>,"expires_in_ms":..}, …]}
|
||||||
|
// The reconnect catch-up read (bugboard #710 acceptance): unlike
|
||||||
|
// Set/Clear it does NOT require a WS client in context — any function
|
||||||
|
// invocation may read. Errors on empty topic or no invocation context.
|
||||||
|
EphemeralStateList(ctx context.Context, topic string) ([]byte, error)
|
||||||
|
|
||||||
// WebSocket operations (only valid in WS context)
|
// WebSocket operations (only valid in WS context)
|
||||||
WSSend(ctx context.Context, clientID string, data []byte) error
|
WSSend(ctx context.Context, clientID string, data []byte) error
|
||||||
WSBroadcast(ctx context.Context, topic string, data []byte) error
|
WSBroadcast(ctx context.Context, topic string, data []byte) error
|
||||||
|
|||||||
129
core/pkg/sniproxy/discoverer.go
Normal file
129
core/pkg/sniproxy/discoverer.go
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
package sniproxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// discoveryWarnInterval rate-limits the "discovery scan failed" warning so a
|
||||||
|
// persistently-unreadable namespaces directory cannot flood the journal.
|
||||||
|
const discoveryWarnInterval = 5 * time.Minute
|
||||||
|
|
||||||
|
// StaticRoutes returns the operator-set routes parsed from the SNI router's own
|
||||||
|
// config file plus the fallback backend. The discoverer merges these with the
|
||||||
|
// auto-discovered TURN routes; static routes win on an SNI conflict.
|
||||||
|
type StaticRoutes func() (routes []Route, fallback Backend, err error)
|
||||||
|
|
||||||
|
// TURNRouteDiscoverer periodically rescans the namespaces directory for
|
||||||
|
// per-namespace TURNS listeners, merges the discovered routes with the static
|
||||||
|
// routes from the config file (static wins on conflict), and atomically
|
||||||
|
// installs the result on the Router.
|
||||||
|
//
|
||||||
|
// A transient failure (unreadable namespaces dir, or a bad static-config read)
|
||||||
|
// logs a rate-limited warning and KEEPS the previously-installed routes — a
|
||||||
|
// filesystem hiccup must never blackhole live :443 traffic.
|
||||||
|
type TURNRouteDiscoverer struct {
|
||||||
|
cfg TURNDiscoveryConfig
|
||||||
|
static StaticRoutes
|
||||||
|
router *Router
|
||||||
|
logger *zap.Logger
|
||||||
|
|
||||||
|
// lastWarn is only touched by the Run goroutine after the synchronous
|
||||||
|
// startup Apply, so it needs no lock.
|
||||||
|
lastWarn time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTURNRouteDiscoverer constructs a discoverer. static reads the operator's
|
||||||
|
// config-file routes + fallback; router receives the merged Replace calls.
|
||||||
|
func NewTURNRouteDiscoverer(cfg TURNDiscoveryConfig, static StaticRoutes, router *Router, logger *zap.Logger) *TURNRouteDiscoverer {
|
||||||
|
if logger == nil {
|
||||||
|
logger = zap.NewNop()
|
||||||
|
}
|
||||||
|
return &TURNRouteDiscoverer{cfg: cfg, static: static, router: router, logger: logger}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply performs one scan+merge and installs the result atomically. On any
|
||||||
|
// transient error it returns the error and leaves the Router untouched so the
|
||||||
|
// caller can decide whether to fail startup (Apply) or keep stale routes (Run).
|
||||||
|
func (d *TURNRouteDiscoverer) Apply() error {
|
||||||
|
staticRoutes, fallback, err := d.static()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
discovered, err := DiscoverTURNRoutes(d.cfg, d.logger)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
merged := mergeRoutes(staticRoutes, discovered)
|
||||||
|
d.router.Replace(merged, fallback)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run scans immediately, then every rescan interval until stop is closed. A
|
||||||
|
// failed scan keeps the current routes and logs a rate-limited warning.
|
||||||
|
func (d *TURNRouteDiscoverer) Run(stop <-chan struct{}) {
|
||||||
|
if err := d.Apply(); err != nil {
|
||||||
|
d.warn("initial TURN route discovery failed; serving config-file routes only", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
interval := d.cfg.RescanInterval
|
||||||
|
if interval <= 0 {
|
||||||
|
interval = DefaultDiscoveryRescanInterval
|
||||||
|
}
|
||||||
|
ticker := time.NewTicker(interval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-stop:
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
if err := d.Apply(); err != nil {
|
||||||
|
d.warn("TURN route discovery failed; keeping current routes", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// warn logs at most once per discoveryWarnInterval to avoid journal flooding
|
||||||
|
// when the namespaces directory is persistently unreadable.
|
||||||
|
func (d *TURNRouteDiscoverer) warn(msg string, err error) {
|
||||||
|
now := time.Now()
|
||||||
|
if now.Sub(d.lastWarn) < discoveryWarnInterval {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
d.lastWarn = now
|
||||||
|
d.logger.Warn(msg,
|
||||||
|
zap.String("namespaces_dir", d.cfg.NamespacesDir),
|
||||||
|
zap.Error(err))
|
||||||
|
}
|
||||||
|
|
||||||
|
// mergeRoutes combines static and discovered routes with static taking
|
||||||
|
// precedence on an SNI-match conflict. Static routes keep their original order
|
||||||
|
// and precede discovered ones, matching Router.Pick's first-match semantics.
|
||||||
|
func mergeRoutes(static, discovered []Route) []Route {
|
||||||
|
seen := make(map[string]struct{}, len(static))
|
||||||
|
merged := make([]Route, 0, len(static)+len(discovered))
|
||||||
|
for _, r := range static {
|
||||||
|
seen[matchKey(r.Match)] = struct{}{}
|
||||||
|
merged = append(merged, r)
|
||||||
|
}
|
||||||
|
for _, r := range discovered {
|
||||||
|
if _, conflict := seen[matchKey(r.Match)]; conflict {
|
||||||
|
continue // static wins
|
||||||
|
}
|
||||||
|
merged = append(merged, r)
|
||||||
|
}
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
// matchKey normalizes an SNI match for conflict comparison (matching is
|
||||||
|
// case-insensitive, mirroring Router.Pick / matchSNI).
|
||||||
|
func matchKey(match string) string {
|
||||||
|
return strings.ToLower(match)
|
||||||
|
}
|
||||||
143
core/pkg/sniproxy/discoverer_test.go
Normal file
143
core/pkg/sniproxy/discoverer_test.go
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
package sniproxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestTURNRouteDiscoverer_staticRouteWinsMerge verifies that when a discovered
|
||||||
|
// stealth route collides with a static config route on the same SNI, the static
|
||||||
|
// route's backend is the one that ends up in the router (static wins).
|
||||||
|
func TestTURNRouteDiscoverer_staticRouteWinsMerge(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
const base = "example.com"
|
||||||
|
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
|
||||||
|
|
||||||
|
stealthHost := turn.StealthHostForNamespace("anchat", base)
|
||||||
|
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
|
||||||
|
|
||||||
|
// Static config pins the very same stealth host to a DIFFERENT backend.
|
||||||
|
static := func() ([]Route, Backend, error) {
|
||||||
|
return []Route{
|
||||||
|
{Match: stealthHost, Backend: Backend{Name: "static-override", Network: "tcp", Addr: "127.0.0.1:9999"}},
|
||||||
|
}, fallback, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
router := NewRouter(Backend{})
|
||||||
|
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: dir, BaseDomain: base}, static, router, nil)
|
||||||
|
if err := d.Apply(); err != nil {
|
||||||
|
t.Fatalf("Apply failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick must return the static backend, not the discovered one.
|
||||||
|
got := router.Pick(stealthHost)
|
||||||
|
if got.Addr != "127.0.0.1:9999" {
|
||||||
|
t.Errorf("static route should win: got backend %q, want 127.0.0.1:9999", got.Addr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The non-conflicting discovered alias must still be present.
|
||||||
|
alias := router.Pick("turn.ns-anchat." + base)
|
||||||
|
if alias.Addr != "127.0.0.1:5349" {
|
||||||
|
t.Errorf("discovered alias route missing/wrong: got %q", alias.Addr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback preserved from static source.
|
||||||
|
if router.Fallback().Addr != "127.0.0.1:8443" {
|
||||||
|
t.Errorf("fallback not preserved: got %q", router.Fallback().Addr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTURNRouteDiscoverer_transientErrorKeepsPreviousRoutes verifies that once
|
||||||
|
// routes are installed, a subsequent Apply whose scan fails (namespaces dir
|
||||||
|
// removed) returns an error and leaves the previously-installed routes intact —
|
||||||
|
// a transient filesystem error must never blackhole :443.
|
||||||
|
func TestTURNRouteDiscoverer_transientErrorKeepsPreviousRoutes(t *testing.T) {
|
||||||
|
parent := t.TempDir()
|
||||||
|
nsDir := filepath.Join(parent, "namespaces")
|
||||||
|
const base = "example.com"
|
||||||
|
writeTURNConfig(t, nsDir, "anchat", "node-1", "0.0.0.0:5349")
|
||||||
|
|
||||||
|
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
|
||||||
|
static := func() ([]Route, Backend, error) { return nil, fallback, nil }
|
||||||
|
|
||||||
|
router := NewRouter(Backend{})
|
||||||
|
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: nsDir, BaseDomain: base}, static, router, nil)
|
||||||
|
|
||||||
|
// First Apply succeeds and installs the anchat routes.
|
||||||
|
if err := d.Apply(); err != nil {
|
||||||
|
t.Fatalf("first Apply failed: %v", err)
|
||||||
|
}
|
||||||
|
before := len(router.Routes())
|
||||||
|
if before != 2 {
|
||||||
|
t.Fatalf("expected 2 routes after first apply, got %d", before)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the namespaces dir unreadable by pointing the discoverer at a now-
|
||||||
|
// removed path (simulate transient read failure).
|
||||||
|
d.cfg.NamespacesDir = filepath.Join(parent, "gone")
|
||||||
|
|
||||||
|
err := d.Apply()
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected Apply to error on missing namespaces dir")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Routes must be unchanged — the failed scan kept the previous table.
|
||||||
|
after := router.Routes()
|
||||||
|
if len(after) != before {
|
||||||
|
t.Errorf("routes changed on transient error: had %d, now %d", before, len(after))
|
||||||
|
}
|
||||||
|
stealthHost := turn.StealthHostForNamespace("anchat", base)
|
||||||
|
if router.Pick(stealthHost).Addr != "127.0.0.1:5349" {
|
||||||
|
t.Errorf("previously-installed stealth route lost after transient error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTURNRouteDiscoverer_staticSourceErrorKeepsRoutes verifies a failing static
|
||||||
|
// source (e.g. a bad config-file edit) also leaves the router untouched.
|
||||||
|
func TestTURNRouteDiscoverer_staticSourceErrorKeepsRoutes(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
const base = "example.com"
|
||||||
|
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
|
||||||
|
|
||||||
|
fallback := Backend{Name: "caddy", Network: "tcp", Addr: "127.0.0.1:8443"}
|
||||||
|
good := func() ([]Route, Backend, error) { return nil, fallback, nil }
|
||||||
|
|
||||||
|
router := NewRouter(Backend{})
|
||||||
|
d := NewTURNRouteDiscoverer(TURNDiscoveryConfig{NamespacesDir: dir, BaseDomain: base}, good, router, nil)
|
||||||
|
if err := d.Apply(); err != nil {
|
||||||
|
t.Fatalf("first Apply failed: %v", err)
|
||||||
|
}
|
||||||
|
before := len(router.Routes())
|
||||||
|
|
||||||
|
// Swap in a static source that errors (simulates a malformed config file).
|
||||||
|
d.static = func() ([]Route, Backend, error) { return nil, Backend{}, errors.New("bad config") }
|
||||||
|
if err := d.Apply(); err == nil {
|
||||||
|
t.Fatalf("expected Apply to error on static source failure")
|
||||||
|
}
|
||||||
|
if len(router.Routes()) != before {
|
||||||
|
t.Errorf("routes changed on static-source error: had %d, now %d", before, len(router.Routes()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMergeRoutes_staticPrecedesDiscovered checks first-match ordering: static
|
||||||
|
// routes precede discovered ones in the merged slice.
|
||||||
|
func TestMergeRoutes_staticPrecedesDiscovered(t *testing.T) {
|
||||||
|
static := []Route{{Match: "a.example.com", Backend: Backend{Addr: "127.0.0.1:1"}}}
|
||||||
|
discovered := []Route{
|
||||||
|
{Match: "a.example.com", Backend: Backend{Addr: "127.0.0.1:2"}}, // conflict, dropped
|
||||||
|
{Match: "b.example.com", Backend: Backend{Addr: "127.0.0.1:3"}},
|
||||||
|
}
|
||||||
|
merged := mergeRoutes(static, discovered)
|
||||||
|
if len(merged) != 2 {
|
||||||
|
t.Fatalf("expected 2 merged routes (1 static + 1 non-conflicting), got %d: %+v", len(merged), merged)
|
||||||
|
}
|
||||||
|
if merged[0].Match != "a.example.com" || merged[0].Backend.Addr != "127.0.0.1:1" {
|
||||||
|
t.Errorf("static route should be first and unchanged: %+v", merged[0])
|
||||||
|
}
|
||||||
|
if merged[1].Match != "b.example.com" {
|
||||||
|
t.Errorf("non-conflicting discovered route missing: %+v", merged)
|
||||||
|
}
|
||||||
|
}
|
||||||
185
core/pkg/sniproxy/discovery.go
Normal file
185
core/pkg/sniproxy/discovery.go
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
package sniproxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultDiscoveryRescanInterval is the default cadence at which the TURN route
|
||||||
|
// discoverer rescans the namespaces directory. SNI route changes (a namespace
|
||||||
|
// gaining or losing its TURNS listener) are infrequent, so 30s of detection
|
||||||
|
// latency is acceptable and keeps load on the filesystem negligible.
|
||||||
|
const DefaultDiscoveryRescanInterval = 30 * time.Second
|
||||||
|
|
||||||
|
// turnConfigGlob matches the per-node TURN config files the namespace spawner
|
||||||
|
// writes under "<namespaces_dir>/<namespace>/configs/turn-<nodeID>.yaml".
|
||||||
|
const turnConfigGlob = "configs/turn-*.yaml"
|
||||||
|
|
||||||
|
// stealthBackendNamePrefix labels discovered TURN backends in logs/metrics.
|
||||||
|
const stealthBackendNamePrefix = "turn-stealth-"
|
||||||
|
|
||||||
|
// turnBackendStealthHostLabel and turnBackendNamespaceLabel are the two SNI
|
||||||
|
// hostname shapes the router forwards to a namespace's TURNS listener.
|
||||||
|
// - the bland hashed host from turn.StealthHostForNamespace (DPI-resistant)
|
||||||
|
// - a human-readable "turn.ns-<namespace>.<base_domain>" alias (operator UX)
|
||||||
|
|
||||||
|
// TURNDiscoveryConfig configures the namespaces scan that derives per-namespace
|
||||||
|
// stealth-TURN routes. All fields are required; a zero RescanInterval selects
|
||||||
|
// DefaultDiscoveryRescanInterval.
|
||||||
|
type TURNDiscoveryConfig struct {
|
||||||
|
// NamespacesDir is the directory holding one subdirectory per namespace,
|
||||||
|
// each containing a "configs/turn-*.yaml" written by the namespace spawner
|
||||||
|
// (e.g. "/opt/orama/.orama/data/namespaces").
|
||||||
|
NamespacesDir string `yaml:"namespaces_dir"`
|
||||||
|
|
||||||
|
// BaseDomain is the cluster's base domain (e.g. "orama-devnet.network"),
|
||||||
|
// used to derive the stealth and "turn.ns-*" SNI hostnames.
|
||||||
|
BaseDomain string `yaml:"base_domain"`
|
||||||
|
|
||||||
|
// RescanInterval is how often the namespaces directory is rescanned. Zero
|
||||||
|
// selects DefaultDiscoveryRescanInterval.
|
||||||
|
RescanInterval time.Duration `yaml:"rescan_interval"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate reports configuration errors. It does not touch the filesystem; a
|
||||||
|
// missing NamespacesDir at scan time is a transient error handled by the
|
||||||
|
// discoverer (previous routes are kept), not a config error.
|
||||||
|
func (c *TURNDiscoveryConfig) Validate() []string {
|
||||||
|
var errs []string
|
||||||
|
if c.NamespacesDir == "" {
|
||||||
|
errs = append(errs, "turn_discovery.namespaces_dir: required")
|
||||||
|
}
|
||||||
|
if c.BaseDomain == "" {
|
||||||
|
errs = append(errs, "turn_discovery.base_domain: required")
|
||||||
|
}
|
||||||
|
return errs
|
||||||
|
}
|
||||||
|
|
||||||
|
// DiscoverTURNRoutes scans cfg.NamespacesDir for per-namespace TURN configs and
|
||||||
|
// returns two routes per namespace that exposes a TURNS listener:
|
||||||
|
//
|
||||||
|
// - turn.StealthHostForNamespace(namespace, baseDomain) -> 127.0.0.1:<tls-port>
|
||||||
|
// - "turn.ns-<namespace>.<baseDomain>" -> 127.0.0.1:<tls-port>
|
||||||
|
//
|
||||||
|
// Namespaces whose TURN config has an empty turns_listen_addr (TURNS disabled)
|
||||||
|
// are skipped. A turn-*.yaml that cannot be read or parsed is skipped with a
|
||||||
|
// per-file warning, but the scan continues for the rest — one bad file must not
|
||||||
|
// hide every other namespace's routes.
|
||||||
|
//
|
||||||
|
// A failure to read the namespaces directory itself returns an error so callers
|
||||||
|
// can keep the previously-installed routes rather than wiping them on a
|
||||||
|
// transient filesystem error.
|
||||||
|
func DiscoverTURNRoutes(cfg TURNDiscoveryConfig, logger *zap.Logger) ([]Route, error) {
|
||||||
|
if logger == nil {
|
||||||
|
logger = zap.NewNop()
|
||||||
|
}
|
||||||
|
|
||||||
|
entries, err := os.ReadDir(cfg.NamespacesDir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read namespaces dir %s: %w", cfg.NamespacesDir, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var routes []Route
|
||||||
|
for _, entry := range entries {
|
||||||
|
if !entry.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
nsRoutes := discoverNamespaceRoutes(cfg, entry.Name(), logger)
|
||||||
|
routes = append(routes, nsRoutes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deterministic order keeps Router.Replace idempotent and tests stable.
|
||||||
|
sort.Slice(routes, func(i, j int) bool { return routes[i].Match < routes[j].Match })
|
||||||
|
return routes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// discoverNamespaceRoutes resolves the stealth + alias routes for a single
|
||||||
|
// namespace directory. Returns nil when the namespace has no TURNS listener or
|
||||||
|
// its config is unreadable/unparseable (logged, not fatal).
|
||||||
|
func discoverNamespaceRoutes(cfg TURNDiscoveryConfig, nsDir string, logger *zap.Logger) []Route {
|
||||||
|
glob := filepath.Join(cfg.NamespacesDir, nsDir, turnConfigGlob)
|
||||||
|
matches, err := filepath.Glob(glob)
|
||||||
|
if err != nil {
|
||||||
|
// Glob only errors on a malformed pattern, which turnConfigGlob is not;
|
||||||
|
// guard anyway so a future edit can't silently swallow it.
|
||||||
|
logger.Warn("turn-config glob failed",
|
||||||
|
zap.String("namespace_dir", nsDir), zap.Error(err))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, configPath := range matches {
|
||||||
|
namespace, tlsPort, ok := parseTURNConfig(configPath, logger)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
backend := Backend{
|
||||||
|
Name: stealthBackendNamePrefix + namespace,
|
||||||
|
Network: "tcp",
|
||||||
|
Addr: net.JoinHostPort("127.0.0.1", tlsPort),
|
||||||
|
}
|
||||||
|
return []Route{
|
||||||
|
{Match: turn.StealthHostForNamespace(namespace, cfg.BaseDomain), Backend: backend},
|
||||||
|
{Match: fmt.Sprintf("turn.ns-%s.%s", namespace, cfg.BaseDomain), Backend: backend},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseTURNConfig reads a turn-*.yaml and returns its namespace and TURNS port.
|
||||||
|
// ok is false (with a warning) when the file is unreadable/unparseable, when it
|
||||||
|
// names no namespace, or when TURNS is disabled (empty turns_listen_addr).
|
||||||
|
func parseTURNConfig(path string, logger *zap.Logger) (namespace, tlsPort string, ok bool) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("read turn config failed", zap.String("path", path), zap.Error(err))
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
var c turn.Config
|
||||||
|
if err := yaml.Unmarshal(data, &c); err != nil {
|
||||||
|
logger.Warn("parse turn config failed", zap.String("path", path), zap.Error(err))
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Namespace == "" {
|
||||||
|
logger.Warn("turn config has empty namespace", zap.String("path", path))
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(c.TURNSListenAddr) == "" {
|
||||||
|
// TURNS disabled for this namespace — no stealth route, not an error.
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
port, err := portFromListenAddr(c.TURNSListenAddr)
|
||||||
|
if err != nil {
|
||||||
|
logger.Warn("turn config has invalid turns_listen_addr",
|
||||||
|
zap.String("path", path),
|
||||||
|
zap.String("turns_listen_addr", c.TURNSListenAddr),
|
||||||
|
zap.Error(err))
|
||||||
|
return "", "", false
|
||||||
|
}
|
||||||
|
return c.Namespace, port, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// portFromListenAddr extracts the port from a "host:port" TURNS listen address
|
||||||
|
// (e.g. "0.0.0.0:5349" -> "5349"). The router always dials 127.0.0.1, so only
|
||||||
|
// the port is needed.
|
||||||
|
func portFromListenAddr(addr string) (string, error) {
|
||||||
|
_, port, err := net.SplitHostPort(addr)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("split host:port: %w", err)
|
||||||
|
}
|
||||||
|
if port == "" {
|
||||||
|
return "", fmt.Errorf("empty port in %q", addr)
|
||||||
|
}
|
||||||
|
return port, nil
|
||||||
|
}
|
||||||
167
core/pkg/sniproxy/discovery_test.go
Normal file
167
core/pkg/sniproxy/discovery_test.go
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
package sniproxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/turn"
|
||||||
|
)
|
||||||
|
|
||||||
|
// writeTURNConfig is a test helper that lays out the on-disk shape the namespace
|
||||||
|
// spawner produces: <namespacesDir>/<namespace>/configs/turn-<nodeID>.yaml.
|
||||||
|
func writeTURNConfig(t *testing.T, namespacesDir, namespace, nodeID, turnsAddr string) {
|
||||||
|
t.Helper()
|
||||||
|
configDir := filepath.Join(namespacesDir, namespace, "configs")
|
||||||
|
if err := os.MkdirAll(configDir, 0755); err != nil {
|
||||||
|
t.Fatalf("mkdir configs failed: %v", err)
|
||||||
|
}
|
||||||
|
content := "namespace: \"" + namespace + "\"\n"
|
||||||
|
content += "turns_listen_addr: \"" + turnsAddr + "\"\n"
|
||||||
|
path := filepath.Join(configDir, "turn-"+nodeID+".yaml")
|
||||||
|
if err := os.WriteFile(path, []byte(content), 0644); err != nil {
|
||||||
|
t.Fatalf("write turn config failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiscoverTURNRoutes_scansFixtureDir verifies that two namespaces each with
|
||||||
|
// a TURNS listener yield two routes apiece (stealth host + turn.ns-* alias),
|
||||||
|
// while a namespace with an empty turns_listen_addr is skipped entirely.
|
||||||
|
func TestDiscoverTURNRoutes_scansFixtureDir(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
const base = "orama-devnet.network"
|
||||||
|
|
||||||
|
writeTURNConfig(t, dir, "anchat", "node-1", "0.0.0.0:5349")
|
||||||
|
writeTURNConfig(t, dir, "video", "node-1", "0.0.0.0:5350")
|
||||||
|
// TURNS disabled — must produce no routes.
|
||||||
|
writeTURNConfig(t, dir, "noturns", "node-1", "")
|
||||||
|
|
||||||
|
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||||
|
NamespacesDir: dir,
|
||||||
|
BaseDomain: base,
|
||||||
|
}, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 namespaces with TURNS × 2 routes each = 4.
|
||||||
|
if len(routes) != 4 {
|
||||||
|
t.Fatalf("expected 4 routes, got %d: %+v", len(routes), routes)
|
||||||
|
}
|
||||||
|
|
||||||
|
got := map[string]string{}
|
||||||
|
for _, r := range routes {
|
||||||
|
got[r.Match] = r.Backend.Addr
|
||||||
|
}
|
||||||
|
|
||||||
|
// anchat: backend port 5349, stealth host + alias.
|
||||||
|
anchatStealth := turn.StealthHostForNamespace("anchat", base)
|
||||||
|
if got[anchatStealth] != "127.0.0.1:5349" {
|
||||||
|
t.Errorf("anchat stealth route missing/wrong: %q -> %q", anchatStealth, got[anchatStealth])
|
||||||
|
}
|
||||||
|
if got["turn.ns-anchat."+base] != "127.0.0.1:5349" {
|
||||||
|
t.Errorf("anchat alias route missing/wrong: got %q", got["turn.ns-anchat."+base])
|
||||||
|
}
|
||||||
|
|
||||||
|
// video: backend port 5350.
|
||||||
|
videoStealth := turn.StealthHostForNamespace("video", base)
|
||||||
|
if got[videoStealth] != "127.0.0.1:5350" {
|
||||||
|
t.Errorf("video stealth route missing/wrong: %q -> %q", videoStealth, got[videoStealth])
|
||||||
|
}
|
||||||
|
if got["turn.ns-video."+base] != "127.0.0.1:5350" {
|
||||||
|
t.Errorf("video alias route missing/wrong: got %q", got["turn.ns-video."+base])
|
||||||
|
}
|
||||||
|
|
||||||
|
// The disabled namespace must not appear under any of its hostnames.
|
||||||
|
if _, ok := got["turn.ns-noturns."+base]; ok {
|
||||||
|
t.Errorf("noturns namespace should be skipped (empty turns_listen_addr)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiscoverTURNRoutes_emptyTURNSAddrSkipped is a focused check that a single
|
||||||
|
// namespace with an empty turns_listen_addr produces zero routes (no error).
|
||||||
|
func TestDiscoverTURNRoutes_emptyTURNSAddrSkipped(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
writeTURNConfig(t, dir, "noturns", "node-1", "")
|
||||||
|
|
||||||
|
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||||
|
NamespacesDir: dir,
|
||||||
|
BaseDomain: "example.com",
|
||||||
|
}, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(routes) != 0 {
|
||||||
|
t.Errorf("expected 0 routes for TURNS-disabled namespace, got %d: %+v", len(routes), routes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiscoverTURNRoutes_unreadableDirReturnsError verifies a missing namespaces
|
||||||
|
// directory is a transient error (so callers keep previous routes), not a silent
|
||||||
|
// empty result.
|
||||||
|
func TestDiscoverTURNRoutes_unreadableDirReturnsError(t *testing.T) {
|
||||||
|
missing := filepath.Join(t.TempDir(), "does-not-exist")
|
||||||
|
|
||||||
|
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||||
|
NamespacesDir: missing,
|
||||||
|
BaseDomain: "example.com",
|
||||||
|
}, nil)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected an error for unreadable namespaces dir, got nil (routes=%+v)", routes)
|
||||||
|
}
|
||||||
|
if routes != nil {
|
||||||
|
t.Errorf("expected nil routes on error, got %+v", routes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDiscoverTURNRoutes_malformedFileSkipped verifies one unparseable
|
||||||
|
// turn-*.yaml is skipped while a sibling valid namespace still yields routes
|
||||||
|
// (one bad file must not hide the rest).
|
||||||
|
func TestDiscoverTURNRoutes_malformedFileSkipped(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
const base = "example.com"
|
||||||
|
|
||||||
|
writeTURNConfig(t, dir, "good", "node-1", "0.0.0.0:5349")
|
||||||
|
|
||||||
|
badDir := filepath.Join(dir, "bad", "configs")
|
||||||
|
if err := os.MkdirAll(badDir, 0755); err != nil {
|
||||||
|
t.Fatalf("mkdir bad configs failed: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(badDir, "turn-node-1.yaml"), []byte(":\n not: [valid"), 0644); err != nil {
|
||||||
|
t.Fatalf("write malformed config failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
routes, err := DiscoverTURNRoutes(TURNDiscoveryConfig{
|
||||||
|
NamespacesDir: dir,
|
||||||
|
BaseDomain: base,
|
||||||
|
}, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("DiscoverTURNRoutes failed: %v", err)
|
||||||
|
}
|
||||||
|
if len(routes) != 2 {
|
||||||
|
t.Fatalf("expected 2 routes from the good namespace, got %d: %+v", len(routes), routes)
|
||||||
|
}
|
||||||
|
goodStealth := turn.StealthHostForNamespace("good", base)
|
||||||
|
found := false
|
||||||
|
for _, r := range routes {
|
||||||
|
if r.Match == goodStealth {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Errorf("good namespace stealth route missing despite malformed sibling")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestTURNDiscoveryConfig_Validate covers the required-field validation.
|
||||||
|
func TestTURNDiscoveryConfig_Validate(t *testing.T) {
|
||||||
|
if errs := (&TURNDiscoveryConfig{NamespacesDir: "/x", BaseDomain: "example.com"}).Validate(); len(errs) != 0 {
|
||||||
|
t.Errorf("valid config reported errors: %v", errs)
|
||||||
|
}
|
||||||
|
if errs := (&TURNDiscoveryConfig{BaseDomain: "example.com"}).Validate(); len(errs) == 0 {
|
||||||
|
t.Errorf("missing namespaces_dir should be invalid")
|
||||||
|
}
|
||||||
|
if errs := (&TURNDiscoveryConfig{NamespacesDir: "/x"}).Validate(); len(errs) == 0 {
|
||||||
|
t.Errorf("missing base_domain should be invalid")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -36,6 +36,27 @@ type Config struct {
|
|||||||
|
|
||||||
// Namespace this TURN instance belongs to
|
// Namespace this TURN instance belongs to
|
||||||
Namespace string `yaml:"namespace"`
|
Namespace string `yaml:"namespace"`
|
||||||
|
|
||||||
|
// StealthDomain is the neutral, CDN-bland SNI hostname this server also
|
||||||
|
// answers TURNS for (e.g. "cdn-a1b2c3d4e5f6.orama-devnet.network").
|
||||||
|
//
|
||||||
|
// The stealth endpoint is an SNI-router passthrough, NOT a separate TURN
|
||||||
|
// server: a router on :443 reads only the TLS ClientHello SNI and forwards
|
||||||
|
// the raw bytes for this hostname to this same TURNS listener. TLS is still
|
||||||
|
// terminated here, by this TURN server, which therefore presents two certs
|
||||||
|
// (the primary TURN domain and StealthDomain) selected by ClientHello SNI.
|
||||||
|
// When empty, the stealth endpoint is disabled and behavior is unchanged.
|
||||||
|
StealthDomain string `yaml:"stealth_domain,omitempty"`
|
||||||
|
|
||||||
|
// TLSStealthCertPath is the path to the TLS certificate PEM file presented
|
||||||
|
// for StealthDomain. The SNI router only forwards bytes; this TURN server
|
||||||
|
// terminates the TLS handshake, so it needs the stealth domain's cert here.
|
||||||
|
TLSStealthCertPath string `yaml:"tls_stealth_cert_path,omitempty"`
|
||||||
|
|
||||||
|
// TLSStealthKeyPath is the path to the TLS private key PEM file for the
|
||||||
|
// StealthDomain certificate (TURN terminates TLS for the router-forwarded
|
||||||
|
// stealth connections).
|
||||||
|
TLSStealthKeyPath string `yaml:"tls_stealth_key_path,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate checks the TURN configuration for errors
|
// Validate checks the TURN configuration for errors
|
||||||
|
|||||||
@ -15,6 +15,11 @@ import (
|
|||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// stealthConfigFieldCount is the number of stealth TLS config fields that must
|
||||||
|
// be set together (StealthDomain, TLSStealthCertPath, TLSStealthKeyPath). Any
|
||||||
|
// other count is a partial config and fails server startup.
|
||||||
|
const stealthConfigFieldCount = 3
|
||||||
|
|
||||||
// Server wraps a Pion TURN server with namespace-scoped HMAC-SHA1 authentication.
|
// Server wraps a Pion TURN server with namespace-scoped HMAC-SHA1 authentication.
|
||||||
type Server struct {
|
type Server struct {
|
||||||
config *Config
|
config *Config
|
||||||
@ -24,8 +29,9 @@ type Server struct {
|
|||||||
tcpListener net.Listener // Plain TCP listener on primary port (3478)
|
tcpListener net.Listener // Plain TCP listener on primary port (3478)
|
||||||
tlsListener net.Listener // TLS TCP listener for TURNS (port 5349)
|
tlsListener net.Listener // TLS TCP listener for TURNS (port 5349)
|
||||||
|
|
||||||
certReloader *certReloader // hot-reloads the TURNS cert; nil when TURNS disabled
|
certReloader *certReloader // hot-reloads the primary TURNS cert; nil when TURNS disabled
|
||||||
certStop chan struct{} // closed to stop the cert-reload watcher goroutine
|
stealthCertReloader *certReloader // hot-reloads the stealth-SNI cert; nil when stealth disabled
|
||||||
|
certStop chan struct{} // closed to stop the cert-reload watcher goroutine(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServer creates and starts a TURN server.
|
// NewServer creates and starts a TURN server.
|
||||||
@ -94,8 +100,18 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
|
|||||||
s.closeListeners()
|
s.closeListeners()
|
||||||
return nil, fmt.Errorf("failed to load TLS cert/key: %w", err)
|
return nil, fmt.Errorf("failed to load TLS cert/key: %w", err)
|
||||||
}
|
}
|
||||||
|
s.certReloader = reloader
|
||||||
|
|
||||||
|
// Stealth SNI: when configured, terminate TLS for a second (neutral)
|
||||||
|
// hostname using its own hot-reloading cert. The SNI router forwards the
|
||||||
|
// raw stealth-domain bytes to this listener; selection is by ServerName.
|
||||||
|
if err := s.loadStealthCertReloader(cfg); err != nil {
|
||||||
|
s.closeListeners()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
tlsConfig := &tls.Config{
|
tlsConfig := &tls.Config{
|
||||||
GetCertificate: reloader.GetCertificate,
|
GetCertificate: newGetCertificate(cfg.StealthDomain, reloader, s.stealthCertReloader),
|
||||||
MinVersion: tls.VersionTLS12,
|
MinVersion: tls.VersionTLS12,
|
||||||
}
|
}
|
||||||
tlsListener, err := tls.Listen("tcp", cfg.TURNSListenAddr, tlsConfig)
|
tlsListener, err := tls.Listen("tcp", cfg.TURNSListenAddr, tlsConfig)
|
||||||
@ -104,9 +120,11 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
|
|||||||
return nil, fmt.Errorf("failed to listen on %s: %w", cfg.TURNSListenAddr, err)
|
return nil, fmt.Errorf("failed to listen on %s: %w", cfg.TURNSListenAddr, err)
|
||||||
}
|
}
|
||||||
s.tlsListener = tlsListener
|
s.tlsListener = tlsListener
|
||||||
s.certReloader = reloader
|
|
||||||
s.certStop = make(chan struct{})
|
s.certStop = make(chan struct{})
|
||||||
go reloader.watch(turnCertReloadInterval, s.certStop)
|
go reloader.watch(turnCertReloadInterval, s.certStop)
|
||||||
|
if s.stealthCertReloader != nil {
|
||||||
|
go s.stealthCertReloader.watch(turnCertReloadInterval, s.certStop)
|
||||||
|
}
|
||||||
|
|
||||||
listenerConfigs = append(listenerConfigs, pionTurn.ListenerConfig{
|
listenerConfigs = append(listenerConfigs, pionTurn.ListenerConfig{
|
||||||
Listener: tlsListener,
|
Listener: tlsListener,
|
||||||
@ -150,6 +168,62 @@ func NewServer(cfg *Config, logger *zap.Logger) (*Server, error) {
|
|||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// loadStealthCertReloader sets up the second cert reloader used for the stealth
|
||||||
|
// SNI hostname, storing it on s.stealthCertReloader. The three stealth fields
|
||||||
|
// (StealthDomain, TLSStealthCertPath, TLSStealthKeyPath) are all-or-nothing: a
|
||||||
|
// partial config is an operator mistake and fails startup rather than silently
|
||||||
|
// running without the stealth endpoint. When none are set, stealth is disabled
|
||||||
|
// and the primary TLS path is byte-for-byte unchanged.
|
||||||
|
func (s *Server) loadStealthCertReloader(cfg *Config) error {
|
||||||
|
set := 0
|
||||||
|
if cfg.StealthDomain != "" {
|
||||||
|
set++
|
||||||
|
}
|
||||||
|
if cfg.TLSStealthCertPath != "" {
|
||||||
|
set++
|
||||||
|
}
|
||||||
|
if cfg.TLSStealthKeyPath != "" {
|
||||||
|
set++
|
||||||
|
}
|
||||||
|
if set == 0 {
|
||||||
|
return nil // stealth disabled
|
||||||
|
}
|
||||||
|
if set != stealthConfigFieldCount {
|
||||||
|
var missing []string
|
||||||
|
if cfg.StealthDomain == "" {
|
||||||
|
missing = append(missing, "stealth_domain")
|
||||||
|
}
|
||||||
|
if cfg.TLSStealthCertPath == "" {
|
||||||
|
missing = append(missing, "tls_stealth_cert_path")
|
||||||
|
}
|
||||||
|
if cfg.TLSStealthKeyPath == "" {
|
||||||
|
missing = append(missing, "tls_stealth_key_path")
|
||||||
|
}
|
||||||
|
return fmt.Errorf("turn: partial stealth config — set all of [stealth_domain, tls_stealth_cert_path, tls_stealth_key_path] or none; missing: %s", strings.Join(missing, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
|
reloader, err := newCertReloader(cfg.TLSStealthCertPath, cfg.TLSStealthKeyPath, s.logger)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to load stealth TLS cert/key (cert=%s, key=%s): %w", cfg.TLSStealthCertPath, cfg.TLSStealthKeyPath, err)
|
||||||
|
}
|
||||||
|
s.stealthCertReloader = reloader
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newGetCertificate builds the tls.Config.GetCertificate callback. When the
|
||||||
|
// ClientHello ServerName equals stealthDomain (case-insensitively), it serves
|
||||||
|
// the stealth cert; every other case — including empty SNI and the primary TURN
|
||||||
|
// domain — serves the primary cert, preserving the pre-stealth behavior. When
|
||||||
|
// stealth is disabled (stealthReloader nil) it is exactly primary.GetCertificate.
|
||||||
|
func newGetCertificate(stealthDomain string, primary, stealth *certReloader) func(*tls.ClientHelloInfo) (*tls.Certificate, error) {
|
||||||
|
return func(hello *tls.ClientHelloInfo) (*tls.Certificate, error) {
|
||||||
|
if stealth != nil && hello != nil && strings.EqualFold(hello.ServerName, stealthDomain) {
|
||||||
|
return stealth.GetCertificate(hello)
|
||||||
|
}
|
||||||
|
return primary.GetCertificate(hello)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// authHandler validates HMAC-SHA1 credentials.
|
// authHandler validates HMAC-SHA1 credentials.
|
||||||
// Username format: {expiry_unix}:{namespace}
|
// Username format: {expiry_unix}:{namespace}
|
||||||
// Password: base64(HMAC-SHA1(shared_secret, username))
|
// Password: base64(HMAC-SHA1(shared_secret, username))
|
||||||
@ -239,6 +313,8 @@ func (s *Server) closeListeners() {
|
|||||||
s.tlsListener.Close()
|
s.tlsListener.Close()
|
||||||
s.tlsListener = nil
|
s.tlsListener = nil
|
||||||
}
|
}
|
||||||
|
s.certReloader = nil
|
||||||
|
s.stealthCertReloader = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GenerateCredentials creates time-limited HMAC-SHA1 TURN credentials.
|
// GenerateCredentials creates time-limited HMAC-SHA1 TURN credentials.
|
||||||
|
|||||||
26
core/pkg/turn/stealth.go
Normal file
26
core/pkg/turn/stealth.go
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
package turn
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// stealthHostHashBytes is how many bytes of the namespace digest appear in the
|
||||||
|
// stealth hostname label. 6 bytes (12 hex chars) keeps the label CDN-bland
|
||||||
|
// while making cross-namespace collisions negligible at platform scale.
|
||||||
|
const stealthHostHashBytes = 6
|
||||||
|
|
||||||
|
// StealthHostForNamespace derives the censorship-resistant TURNS hostname for
|
||||||
|
// a namespace: "cdn-<12-hex-of-sha256(namespace)>.<baseDomain>".
|
||||||
|
//
|
||||||
|
// Design (feat-124): the label must NOT contain the namespace (an SNI string
|
||||||
|
// like "cdn.ns-anchat-test.…" hands DPI the exact app to block), must be
|
||||||
|
// deterministic so every component (cluster manager, namespace gateway, SNI
|
||||||
|
// router, DNS) derives the same value with no extra coordination, and must be
|
||||||
|
// unique per namespace because the SNI router maps it to that namespace's
|
||||||
|
// TURN-TLS backend.
|
||||||
|
func StealthHostForNamespace(namespace, baseDomain string) string {
|
||||||
|
sum := sha256.Sum256([]byte(namespace))
|
||||||
|
return fmt.Sprintf("cdn-%s.%s", hex.EncodeToString(sum[:stealthHostHashBytes]), baseDomain)
|
||||||
|
}
|
||||||
201
core/pkg/turn/stealth_server_test.go
Normal file
201
core/pkg/turn/stealth_server_test.go
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
package turn
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/tls"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// feat-124: the stealth TURNS endpoint is an SNI-router passthrough — the TURN
|
||||||
|
// server terminates TLS for both the primary TURN domain and a neutral stealth
|
||||||
|
// domain, selecting the cert by ClientHello SNI. These pin: per-SNI selection
|
||||||
|
// (incl. empty SNI, case-insensitivity), partial-config startup failure, and
|
||||||
|
// the missing stealth-cert startup failure (no silent fallback).
|
||||||
|
|
||||||
|
const (
|
||||||
|
stealthTestDomain = "cdn-a1b2c3d4e5f6.orama-devnet.network"
|
||||||
|
turnTestDomain = "turn.orama-devnet.network"
|
||||||
|
)
|
||||||
|
|
||||||
|
func writeNamedCert(t *testing.T, dir, name string) (certPath, keyPath string) {
|
||||||
|
t.Helper()
|
||||||
|
certPath = filepath.Join(dir, name+".pem")
|
||||||
|
keyPath = filepath.Join(dir, name+".key.pem")
|
||||||
|
if err := GenerateSelfSignedCert(certPath, keyPath, "127.0.0.1"); err != nil {
|
||||||
|
t.Fatalf("GenerateSelfSignedCert(%s): %v", name, err)
|
||||||
|
}
|
||||||
|
return certPath, keyPath
|
||||||
|
}
|
||||||
|
|
||||||
|
func certLeafForSNI(t *testing.T, getCert func(*tls.ClientHelloInfo) (*tls.Certificate, error), serverName string) []byte {
|
||||||
|
t.Helper()
|
||||||
|
cert, err := getCert(&tls.ClientHelloInfo{ServerName: serverName})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GetCertificate(%q): %v", serverName, err)
|
||||||
|
}
|
||||||
|
if cert == nil || len(cert.Certificate) == 0 {
|
||||||
|
t.Fatalf("GetCertificate(%q) returned an empty certificate", serverName)
|
||||||
|
}
|
||||||
|
return cert.Certificate[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetCertificate_stealthSNISelectsStealthCert(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
|
||||||
|
stealthCert, stealthKey := writeNamedCert(t, dir, "stealth")
|
||||||
|
|
||||||
|
primary, err := newCertReloader(primaryCert, primaryKey, zap.NewNop())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("newCertReloader(primary): %v", err)
|
||||||
|
}
|
||||||
|
stealth, err := newCertReloader(stealthCert, stealthKey, zap.NewNop())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("newCertReloader(stealth): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
getCert := newGetCertificate(stealthTestDomain, primary, stealth)
|
||||||
|
|
||||||
|
wantPrimary := leafDER(t, primary)
|
||||||
|
wantStealth := leafDER(t, stealth)
|
||||||
|
if bytes.Equal(wantPrimary, wantStealth) {
|
||||||
|
t.Fatal("test setup error: primary and stealth certs must be distinct")
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
serverName string
|
||||||
|
want []byte
|
||||||
|
}{
|
||||||
|
{"stealth SNI selects stealth cert", stealthTestDomain, wantStealth},
|
||||||
|
{"stealth SNI is case-insensitive", strings.ToUpper(stealthTestDomain), wantStealth},
|
||||||
|
{"turn domain SNI selects primary cert", turnTestDomain, wantPrimary},
|
||||||
|
{"empty SNI selects primary cert", "", wantPrimary},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := certLeafForSNI(t, getCert, tt.serverName)
|
||||||
|
if !bytes.Equal(got, tt.want) {
|
||||||
|
t.Errorf("ServerName=%q served the wrong certificate", tt.serverName)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetCertificate_stealthDisabledAlwaysPrimary(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
|
||||||
|
primary, err := newCertReloader(primaryCert, primaryKey, zap.NewNop())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("newCertReloader(primary): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stealth disabled (nil reloader): every SNI — including a string that looks
|
||||||
|
// like a stealth host — must serve the primary cert unchanged.
|
||||||
|
getCert := newGetCertificate("", primary, nil)
|
||||||
|
want := leafDER(t, primary)
|
||||||
|
|
||||||
|
for _, serverName := range []string{"", turnTestDomain, stealthTestDomain} {
|
||||||
|
if got := certLeafForSNI(t, getCert, serverName); !bytes.Equal(got, want) {
|
||||||
|
t.Errorf("ServerName=%q must serve the primary cert when stealth is disabled", serverName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func baseStealthConfig(t *testing.T) *Config {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
primaryCert, primaryKey := writeNamedCert(t, dir, "primary")
|
||||||
|
return &Config{
|
||||||
|
ListenAddr: "127.0.0.1:0",
|
||||||
|
TURNSListenAddr: "127.0.0.1:0",
|
||||||
|
TLSCertPath: primaryCert,
|
||||||
|
TLSKeyPath: primaryKey,
|
||||||
|
PublicIP: "127.0.0.1",
|
||||||
|
Realm: "orama-devnet.network",
|
||||||
|
AuthSecret: "test-secret-key",
|
||||||
|
RelayPortStart: 49152,
|
||||||
|
RelayPortEnd: 50000,
|
||||||
|
Namespace: "test-ns",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestServer_partialStealthConfigFails(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
mutate func(c *Config)
|
||||||
|
wantMissing []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "only stealth_domain set",
|
||||||
|
mutate: func(c *Config) { c.StealthDomain = stealthTestDomain },
|
||||||
|
wantMissing: []string{"tls_stealth_cert_path", "tls_stealth_key_path"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "domain and cert set, key missing",
|
||||||
|
mutate: func(c *Config) { c.StealthDomain = stealthTestDomain; c.TLSStealthCertPath = "/tmp/x.pem" },
|
||||||
|
wantMissing: []string{"tls_stealth_key_path"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "only cert path set",
|
||||||
|
mutate: func(c *Config) { c.TLSStealthCertPath = "/tmp/x.pem" },
|
||||||
|
wantMissing: []string{"stealth_domain", "tls_stealth_key_path"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
cfg := baseStealthConfig(t)
|
||||||
|
tt.mutate(cfg)
|
||||||
|
|
||||||
|
srv, err := NewServer(cfg, zap.NewNop())
|
||||||
|
if err == nil {
|
||||||
|
srv.Close()
|
||||||
|
t.Fatal("expected startup to fail on partial stealth config")
|
||||||
|
}
|
||||||
|
for _, field := range tt.wantMissing {
|
||||||
|
if !strings.Contains(err.Error(), field) {
|
||||||
|
t.Errorf("error must name the missing field %q; got: %v", field, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestServer_missingStealthCertFails(t *testing.T) {
|
||||||
|
cfg := baseStealthConfig(t)
|
||||||
|
cfg.StealthDomain = stealthTestDomain
|
||||||
|
cfg.TLSStealthCertPath = filepath.Join(t.TempDir(), "absent-cert.pem")
|
||||||
|
cfg.TLSStealthKeyPath = filepath.Join(t.TempDir(), "absent-key.pem")
|
||||||
|
|
||||||
|
srv, err := NewServer(cfg, zap.NewNop())
|
||||||
|
if err == nil {
|
||||||
|
srv.Close()
|
||||||
|
t.Fatal("expected startup to fail when the stealth cert file is absent")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), cfg.TLSStealthCertPath) {
|
||||||
|
t.Errorf("error must name the missing stealth cert path %q; got: %v", cfg.TLSStealthCertPath, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestServer_fullStealthConfigStarts(t *testing.T) {
|
||||||
|
cfg := baseStealthConfig(t)
|
||||||
|
dir := t.TempDir()
|
||||||
|
stealthCert, stealthKey := writeNamedCert(t, dir, "stealth")
|
||||||
|
cfg.StealthDomain = stealthTestDomain
|
||||||
|
cfg.TLSStealthCertPath = stealthCert
|
||||||
|
cfg.TLSStealthKeyPath = stealthKey
|
||||||
|
|
||||||
|
srv, err := NewServer(cfg, zap.NewNop())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected startup to succeed with full stealth config: %v", err)
|
||||||
|
}
|
||||||
|
defer srv.Close()
|
||||||
|
if srv.stealthCertReloader == nil {
|
||||||
|
t.Error("stealthCertReloader must be set when stealth is fully configured")
|
||||||
|
}
|
||||||
|
}
|
||||||
53
core/pkg/turn/stealth_test.go
Normal file
53
core/pkg/turn/stealth_test.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package turn
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestStealthHostForNamespace_deterministic(t *testing.T) {
|
||||||
|
a := StealthHostForNamespace("anchat-test", "orama-devnet.network")
|
||||||
|
b := StealthHostForNamespace("anchat-test", "orama-devnet.network")
|
||||||
|
if a != b {
|
||||||
|
t.Fatalf("not deterministic: %q vs %q", a, b)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(a, "cdn-") || !strings.HasSuffix(a, ".orama-devnet.network") {
|
||||||
|
t.Errorf("unexpected shape: %q", a)
|
||||||
|
}
|
||||||
|
// label = "cdn-" + 12 hex chars
|
||||||
|
label := strings.SplitN(a, ".", 2)[0]
|
||||||
|
if len(label) != len("cdn-")+stealthHostHashBytes*2 {
|
||||||
|
t.Errorf("label %q has wrong length", label)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStealthHostForNamespace_namespaceNotLeaked(t *testing.T) {
|
||||||
|
h := StealthHostForNamespace("anchat-test", "orama-devnet.network")
|
||||||
|
if strings.Contains(h, "anchat") {
|
||||||
|
t.Errorf("stealth host %q leaks the namespace name", h)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStealthHostForNamespace_distinctPerNamespace(t *testing.T) {
|
||||||
|
a := StealthHostForNamespace("ns-a", "example.com")
|
||||||
|
b := StealthHostForNamespace("ns-b", "example.com")
|
||||||
|
if a == b {
|
||||||
|
t.Fatalf("different namespaces produced the same stealth host %q", a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStealthHostForNamespace_matchesDNSNameAllowlist guards the contract that
|
||||||
|
// the derived host always passes the Caddyfile DNS-name allowlist
|
||||||
|
// (pkg/namespace turn_cert.go dnsNamePattern) — a legitimate stealth domain
|
||||||
|
// must never be rejected by that defense-in-depth check. Mirrors the same
|
||||||
|
// conservative pattern here to avoid an import cycle.
|
||||||
|
func TestStealthHostForNamespace_matchesDNSNameAllowlist(t *testing.T) {
|
||||||
|
dnsName := regexp.MustCompile(`^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$`)
|
||||||
|
for _, ns := range []string{"anchat-test", "a", "ns-with-many-dashes", "x1y2z3"} {
|
||||||
|
h := StealthHostForNamespace(ns, "orama-devnet.network")
|
||||||
|
if !dnsName.MatchString(h) {
|
||||||
|
t.Errorf("derived stealth host %q for ns %q fails the DNS-name allowlist", h, ns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user