feat(namespace): reuse caddy wildcard certificate for stealth turns

- Implement `resolveStealthCert` to use existing `*.<baseDomain>` wildcard certificates instead of dynamic Caddyfile provisioning.
- Avoids EROFS errors caused by `ProtectSystem=strict` on the orama-node service.
- Add strict validation to ensure stealth hosts are single-label subdomains covered by the wildcard.
This commit is contained in:
anonpenguin23 2026-06-11 10:04:45 +03:00
parent 37daf28b5a
commit 8375d92109
3 changed files with 147 additions and 13 deletions

View File

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"time" "time"
production "github.com/DeBrosOfficial/network/pkg/environments/production" production "github.com/DeBrosOfficial/network/pkg/environments/production"
@ -582,6 +583,53 @@ func (s *SystemdSpawner) resolveTURNSCert(namespace, domain, publicIP, configDir
return certPath, keyPath, nil return certPath, keyPath, nil
} }
// resolveStealthCert resolves the TLS cert/key for the stealth TURNS host by
// reusing Caddy's existing `*.<baseDomain>` wildcard certificate (feat-124).
//
// The stealth host is a single-label subdomain of the base domain
// (cdn-<hash>.<baseDomain>), so the wildcard the gateway already provisions
// for HTTPS covers it. This deliberately avoids the runtime
// append-to-Caddyfile provisioning path: the orama-node service runs
// ProtectSystem=strict as the orama user and cannot write /etc/caddy, so that
// path fails with EROFS (and would silently fall back to a self-signed cert
// that clients reject — indistinguishable from being blocked). Caddy renews
// the wildcard; the TURN cert reloader hot-reloads it from storage.
//
// Hard error (never self-signed) when the wildcard is missing or the host is
// not a single-label subdomain — a stealth endpoint with an unvalidatable
// cert is worse than no stealth endpoint.
func (s *SystemdSpawner) resolveStealthCert(stealthDomain, baseDomain string) (string, string, error) {
if baseDomain == "" {
return "", "", fmt.Errorf("stealth cert: base domain required")
}
if !isSingleLabelSubdomain(stealthDomain, baseDomain) {
return "", "", fmt.Errorf("stealth cert: %q is not a single-label subdomain of %q (the *.%s wildcard cert would not cover it)", stealthDomain, baseDomain, baseDomain)
}
certPath, keyPath := caddyWildcardCertPaths(baseDomain)
if _, err := os.Stat(certPath); err != nil {
return "", "", fmt.Errorf("stealth cert: Caddy wildcard cert for *.%s not found at %s (is the gateway HTTPS wildcard provisioned on this node?): %w", baseDomain, certPath, err)
}
if _, err := os.Stat(keyPath); err != nil {
return "", "", fmt.Errorf("stealth cert: Caddy wildcard key for *.%s not found at %s: %w", baseDomain, keyPath, err)
}
s.logger.Info("Using Caddy wildcard cert for stealth TURNS",
zap.String("stealth_domain", stealthDomain),
zap.String("cert_path", certPath))
return certPath, keyPath, nil
}
// isSingleLabelSubdomain reports whether host is exactly one DNS label below
// base (e.g. "cdn-x.example.com" under "example.com"), which is the set a
// `*.base` wildcard certificate covers.
func isSingleLabelSubdomain(host, base string) bool {
suffix := "." + base
if !strings.HasSuffix(host, suffix) {
return false
}
label := strings.TrimSuffix(host, suffix)
return label != "" && !strings.Contains(label, ".")
}
// SpawnTURN starts a TURN instance using systemd // SpawnTURN starts a TURN instance using systemd
func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string, cfg TURNInstanceConfig) error { func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string, cfg TURNInstanceConfig) error {
s.logger.Info("Spawning TURN via systemd", s.logger.Info("Spawning TURN via systemd",
@ -620,25 +668,24 @@ func (s *SystemdSpawner) SpawnTURN(ctx context.Context, namespace, nodeID string
var stealthCertPath, stealthKeyPath string var stealthCertPath, stealthKeyPath string
if cfg.StealthDomain != "" { if cfg.StealthDomain != "" {
// Security: the stealth domain arrives over the spawn protocol (mesh // Security: the stealth domain arrives over the spawn protocol (mesh
// peers gated only by the static internal-auth header). Before it // peers gated only by the static internal-auth header). Pin it to the
// reaches the Caddyfile/ACME sink, pin it to the deterministic // deterministic derivation so a forged value can't select cert
// derivation so a forged value can't drive cert issuance for an // material for an attacker-chosen name. cfg.Realm is the base domain
// attacker-chosen name. cfg.Realm is the base domain on every TURN // on every TURN spawn site.
// spawn site. (provisionTURNCertViaCaddy adds a DNS-name allowlist as if cfg.Realm == "" {
// defense-in-depth.) return fmt.Errorf("stealth TURNS for namespace %s requires a base domain (realm) to locate the wildcard cert", namespace)
if cfg.Realm != "" { }
want := turn.StealthHostForNamespace(cfg.Namespace, cfg.Realm) want := turn.StealthHostForNamespace(cfg.Namespace, cfg.Realm)
if cfg.StealthDomain != want { if cfg.StealthDomain != want {
return fmt.Errorf("stealth domain %q does not match the derived host %q for namespace %s — refusing to provision", cfg.StealthDomain, want, cfg.Namespace) return fmt.Errorf("stealth domain %q does not match the derived host %q for namespace %s — refusing to provision", cfg.StealthDomain, want, cfg.Namespace)
}
} }
if cfg.TURNSListenAddr == "" { if cfg.TURNSListenAddr == "" {
return fmt.Errorf("stealth TURNS for namespace %s requires an active TURNS listener (no TLS cert/listener available)", namespace) return fmt.Errorf("stealth TURNS for namespace %s requires an active TURNS listener (no TLS cert/listener available)", namespace)
} }
var stealthErr error var stealthErr error
stealthCertPath, stealthKeyPath, stealthErr = s.resolveTURNSCert(namespace, cfg.StealthDomain, cfg.PublicIP, configDir, false) stealthCertPath, stealthKeyPath, stealthErr = s.resolveStealthCert(cfg.StealthDomain, cfg.Realm)
if stealthErr != nil { if stealthErr != nil {
return fmt.Errorf("failed to provision stealth TURNS cert for namespace %s: %w", namespace, stealthErr) return fmt.Errorf("failed to resolve stealth TURNS cert for namespace %s: %w", namespace, stealthErr)
} }
} }

View File

@ -25,10 +25,30 @@ const (
// Caddy stores ACME certs under this directory relative to its data dir. // Caddy stores ACME certs under this directory relative to its data dir.
caddyACMECertDir = "certificates/acme-v02.api.letsencrypt.org-directory" caddyACMECertDir = "certificates/acme-v02.api.letsencrypt.org-directory"
// caddyServiceStorageDir is where the Caddy systemd service (User=orama,
// HOME=/var/lib/caddy) actually persists its ACME certificates on a node.
// The orama-node service runs ProtectSystem=strict and cannot write
// /etc/caddy, so the runtime "append-to-Caddyfile" provisioning path
// (provisionTURNCertViaCaddy) fails with EROFS — TURNS cert material is
// instead reused from this directory (see caddyWildcardCertPaths).
caddyServiceStorageDir = "/var/lib/caddy/caddy"
turnCertBeginMarker = "# BEGIN TURN CERT: " turnCertBeginMarker = "# BEGIN TURN CERT: "
turnCertEndMarker = "# END TURN CERT: " turnCertEndMarker = "# END TURN CERT: "
) )
// caddyWildcardCertPaths returns the cert/key file paths for the
// `*.<baseDomain>` wildcard certificate in the Caddy service's storage. Caddy
// names the wildcard directory `wildcard_.<baseDomain>`. The gateway already
// provisions this wildcard for HTTPS, so a single-label subdomain of the base
// domain (e.g. the stealth TURNS host `cdn-<hash>.<baseDomain>`) is covered by
// it without any per-domain provisioning.
func caddyWildcardCertPaths(baseDomain string) (certPath, keyPath string) {
name := "wildcard_." + baseDomain
dir := filepath.Join(caddyServiceStorageDir, caddyACMECertDir, name)
return filepath.Join(dir, name+".crt"), filepath.Join(dir, name+".key")
}
// provisionTURNCertViaCaddy appends the TURN domain to the local Caddyfile, // provisionTURNCertViaCaddy appends the TURN domain to the local Caddyfile,
// reloads Caddy to trigger DNS-01 ACME certificate provisioning, and waits // reloads Caddy to trigger DNS-01 ACME certificate provisioning, and waits
// for the cert files to appear. Returns the cert/key paths on success. // for the cert files to appear. Returns the cert/key paths on success.

View File

@ -106,3 +106,70 @@ func TestProvisionTURNCertViaCaddy_rejectsNonDNSName(t *testing.T) {
} }
} }
} }
// feat-124 stealth cert reuse: the stealth TURNS host reuses Caddy's existing
// *.<base> wildcard cert instead of writing the Caddyfile (the orama-node
// service can't, ProtectSystem=strict). These pin the validation logic.
func TestIsSingleLabelSubdomain(t *testing.T) {
cases := []struct {
host, base string
want bool
}{
{"cdn-a1b2c3d4e5f6.orama-devnet.network", "orama-devnet.network", true},
{"turn.ns-anchat-test.orama-devnet.network", "orama-devnet.network", false}, // multi-label
{"orama-devnet.network", "orama-devnet.network", false}, // empty label
{"cdn-x.other.network", "orama-devnet.network", false}, // wrong base
{"cdn-x.example.com", "example.com", true},
}
for _, c := range cases {
if got := isSingleLabelSubdomain(c.host, c.base); got != c.want {
t.Errorf("isSingleLabelSubdomain(%q, %q) = %v; want %v", c.host, c.base, got, c.want)
}
}
}
func TestCaddyWildcardCertPaths_shape(t *testing.T) {
crt, key := caddyWildcardCertPaths("orama-devnet.network")
wantCrt := "/var/lib/caddy/caddy/certificates/acme-v02.api.letsencrypt.org-directory/wildcard_.orama-devnet.network/wildcard_.orama-devnet.network.crt"
if crt != wantCrt {
t.Errorf("cert path = %q; want %q", crt, wantCrt)
}
if !strings.HasSuffix(key, "wildcard_.orama-devnet.network.key") {
t.Errorf("key path = %q; want a wildcard .key", key)
}
}
func TestResolveStealthCert_rejectsMultiLabelHost(t *testing.T) {
s := testSpawner(t)
// A host that needs *.ns-x.<base> (multi-label) is NOT covered by the
// *.<base> wildcard — must error rather than present a mismatched cert.
_, _, err := s.resolveStealthCert("turn.ns-x.orama-devnet.network", "orama-devnet.network")
if err == nil {
t.Fatal("multi-label host must be rejected (wildcard wouldn't cover it)")
}
if !strings.Contains(err.Error(), "single-label") {
t.Errorf("error should explain the single-label requirement; got: %v", err)
}
}
func TestResolveStealthCert_missingWildcardErrors(t *testing.T) {
s := testSpawner(t)
// Valid single-label host but the wildcard cert almost certainly does not
// exist at the absolute Caddy storage path during tests → hard error
// naming the path, never a self-signed fallback.
_, _, err := s.resolveStealthCert("cdn-deadbeef0000.test-nonexistent-base.invalid", "test-nonexistent-base.invalid")
if err == nil {
t.Fatal("missing wildcard cert must hard-fail")
}
if !strings.Contains(err.Error(), "wildcard") {
t.Errorf("error should reference the missing wildcard cert; got: %v", err)
}
}
func TestResolveStealthCert_emptyBaseErrors(t *testing.T) {
s := testSpawner(t)
if _, _, err := s.resolveStealthCert("cdn-x.example.com", ""); err == nil {
t.Fatal("empty base domain must error")
}
}