From 810094771ddf674de1824955a4067f71873e80e5 Mon Sep 17 00:00:00 2001 From: anonpenguin23 Date: Sat, 31 Jan 2026 07:09:09 +0200 Subject: [PATCH] Updated docs and fixed WG bugs and ip's bugs --- docs/CLEAN_NODE.md | 141 ++++++++++++++++++ pkg/cli/production/install/orchestrator.go | 23 ++- pkg/environments/production/installers.go | 4 +- .../production/installers/caddy.go | 54 +++---- pkg/environments/production/orchestrator.go | 17 ++- pkg/environments/production/services.go | 3 +- pkg/environments/production/wireguard.go | 7 +- pkg/node/dns_registration.go | 123 +++++++++++---- 8 files changed, 299 insertions(+), 73 deletions(-) create mode 100644 docs/CLEAN_NODE.md diff --git a/docs/CLEAN_NODE.md b/docs/CLEAN_NODE.md new file mode 100644 index 0000000..94da064 --- /dev/null +++ b/docs/CLEAN_NODE.md @@ -0,0 +1,141 @@ +# Clean Node — Full Reset Guide + +How to completely remove all Orama Network state from a VPS so it can be reinstalled fresh. + +## Quick Clean (Copy-Paste) + +Run this as root or with sudo on the target VPS: + +```bash +# 1. Stop and disable all services +sudo systemctl stop debros-node debros-ipfs debros-ipfs-cluster debros-olric coredns caddy 2>/dev/null +sudo systemctl disable debros-node debros-ipfs debros-ipfs-cluster debros-olric coredns caddy 2>/dev/null + +# 2. Remove systemd service files +sudo rm -f /etc/systemd/system/debros-*.service +sudo rm -f /etc/systemd/system/coredns.service +sudo rm -f /etc/systemd/system/caddy.service +sudo systemctl daemon-reload + +# 3. Tear down WireGuard +# Must stop the systemd unit first — wg-quick@wg0 is a oneshot with +# RemainAfterExit=yes, so it stays "active (exited)" even after the +# interface is removed. Without "stop", a future "systemctl start" is a no-op. +sudo systemctl stop wg-quick@wg0 2>/dev/null +sudo wg-quick down wg0 2>/dev/null +sudo systemctl disable wg-quick@wg0 2>/dev/null +sudo rm -f /etc/wireguard/wg0.conf + +# 4. Reset UFW firewall +sudo ufw --force reset +sudo ufw allow 22/tcp +sudo ufw --force enable + +# 5. Remove debros user and home directory +sudo userdel -r debros 2>/dev/null +sudo rm -rf /home/debros + +# 6. Remove sudoers files +sudo rm -f /etc/sudoers.d/debros-access +sudo rm -f /etc/sudoers.d/debros-deployments +sudo rm -f /etc/sudoers.d/debros-wireguard + +# 7. Remove CoreDNS config +sudo rm -rf /etc/coredns + +# 8. Remove Caddy config and data +sudo rm -rf /etc/caddy +sudo rm -rf /var/lib/caddy + +# 9. Remove deployment systemd services (dynamic) +sudo rm -f /etc/systemd/system/orama-deploy-*.service +sudo systemctl daemon-reload + +# 10. Clean temp files +sudo rm -f /tmp/orama /tmp/network-source.tar.gz /tmp/network-source.zip +sudo rm -rf /tmp/network-extract /tmp/coredns-build /tmp/caddy-build + +echo "Node cleaned. Ready for fresh install." +``` + +## What This Removes + +| Category | Paths | +|----------|-------| +| **User** | `debros` system user and `/home/debros/` | +| **App data** | `/home/debros/.orama/` (configs, secrets, logs, IPFS, RQLite, Olric) | +| **Source code** | `/home/debros/src/` | +| **Binaries** | `/home/debros/bin/orama-node`, `/home/debros/bin/gateway` | +| **Systemd** | `debros-*.service`, `coredns.service`, `caddy.service`, `orama-deploy-*.service` | +| **WireGuard** | `/etc/wireguard/wg0.conf`, `wg-quick@wg0` systemd unit | +| **Firewall** | All UFW rules (reset to default + SSH only) | +| **Sudoers** | `/etc/sudoers.d/debros-*` | +| **CoreDNS** | `/etc/coredns/Corefile` | +| **Caddy** | `/etc/caddy/Caddyfile`, `/var/lib/caddy/` (TLS certs) | +| **Temp files** | `/tmp/orama`, `/tmp/network-source.*`, build dirs | + +## What This Does NOT Remove + +These are shared system tools that may be used by other software. Remove manually if desired: + +| Binary | Path | Remove Command | +|--------|------|----------------| +| RQLite | `/usr/local/bin/rqlited` | `sudo rm /usr/local/bin/rqlited` | +| IPFS | `/usr/local/bin/ipfs` | `sudo rm /usr/local/bin/ipfs` | +| IPFS Cluster | `/usr/local/bin/ipfs-cluster-service` | `sudo rm /usr/local/bin/ipfs-cluster-service` | +| Olric | `/usr/local/bin/olric-server` | `sudo rm /usr/local/bin/olric-server` | +| CoreDNS | `/usr/local/bin/coredns` | `sudo rm /usr/local/bin/coredns` | +| Caddy | `/usr/bin/caddy` | `sudo rm /usr/bin/caddy` | +| xcaddy | `/usr/local/bin/xcaddy` | `sudo rm /usr/local/bin/xcaddy` | +| Go | `/usr/local/go/` | `sudo rm -rf /usr/local/go` | +| Orama CLI | `/usr/local/bin/orama` | `sudo rm /usr/local/bin/orama` | + +## Nuclear Clean (Remove Everything Including Binaries) + +```bash +# Run quick clean above first, then: +sudo rm -f /usr/local/bin/rqlited +sudo rm -f /usr/local/bin/ipfs +sudo rm -f /usr/local/bin/ipfs-cluster-service +sudo rm -f /usr/local/bin/olric-server +sudo rm -f /usr/local/bin/coredns +sudo rm -f /usr/local/bin/xcaddy +sudo rm -f /usr/bin/caddy +sudo rm -f /usr/local/bin/orama +``` + +## Multi-Node Clean + +To clean all nodes at once from your local machine: + +```bash +# Define your nodes +NODES=( + "ubuntu@141.227.165.168:password1" + "ubuntu@141.227.165.154:password2" + "ubuntu@141.227.156.51:password3" +) + +for entry in "${NODES[@]}"; do + IFS=: read -r userhost pass <<< "$entry" + echo "Cleaning $userhost..." + sshpass -p "$pass" ssh -o StrictHostKeyChecking=no "$userhost" 'bash -s' << 'CLEAN' +sudo systemctl stop debros-node debros-ipfs debros-ipfs-cluster debros-olric coredns caddy 2>/dev/null +sudo systemctl disable debros-node debros-ipfs debros-ipfs-cluster debros-olric coredns caddy 2>/dev/null +sudo rm -f /etc/systemd/system/debros-*.service /etc/systemd/system/coredns.service /etc/systemd/system/caddy.service /etc/systemd/system/orama-deploy-*.service +sudo systemctl daemon-reload +sudo systemctl stop wg-quick@wg0 2>/dev/null +sudo wg-quick down wg0 2>/dev/null +sudo systemctl disable wg-quick@wg0 2>/dev/null +sudo rm -f /etc/wireguard/wg0.conf +sudo ufw --force reset && sudo ufw allow 22/tcp && sudo ufw --force enable +sudo userdel -r debros 2>/dev/null +sudo rm -rf /home/debros +sudo rm -f /etc/sudoers.d/debros-access /etc/sudoers.d/debros-deployments /etc/sudoers.d/debros-wireguard +sudo rm -rf /etc/coredns /etc/caddy /var/lib/caddy +sudo rm -f /tmp/orama /tmp/network-source.tar.gz +sudo rm -rf /tmp/network-extract /tmp/coredns-build /tmp/caddy-build +echo "Done" +CLEAN +done +``` diff --git a/pkg/cli/production/install/orchestrator.go b/pkg/cli/production/install/orchestrator.go index e383c94..24f9380 100644 --- a/pkg/cli/production/install/orchestrator.go +++ b/pkg/cli/production/install/orchestrator.go @@ -199,15 +199,24 @@ func (o *Orchestrator) executeGenesisFlow() error { return fmt.Errorf("service creation failed: %w", err) } - // Phase 7: Seed DNS records + // Phase 7: Seed DNS records (with retry — migrations may still be running) if o.flags.Nameserver && o.flags.BaseDomain != "" { fmt.Printf("\n🌐 Phase 7: Seeding DNS records...\n") - fmt.Printf(" Waiting for RQLite to start (10s)...\n") - time.Sleep(10 * time.Second) - if err := o.setup.SeedDNSRecords(o.flags.BaseDomain, o.flags.VpsIP, o.peers); err != nil { - fmt.Fprintf(os.Stderr, " āš ļø Warning: Failed to seed DNS records: %v\n", err) - } else { - fmt.Printf(" āœ“ DNS records seeded\n") + var seedErr error + for attempt := 1; attempt <= 6; attempt++ { + waitSec := 5 * attempt + fmt.Printf(" Waiting for RQLite + migrations (%ds, attempt %d/6)...\n", waitSec, attempt) + time.Sleep(time.Duration(waitSec) * time.Second) + seedErr = o.setup.SeedDNSRecords(o.flags.BaseDomain, o.flags.VpsIP, o.peers) + if seedErr == nil { + fmt.Printf(" āœ“ DNS records seeded\n") + break + } + fmt.Fprintf(os.Stderr, " āš ļø Attempt %d failed: %v\n", attempt, seedErr) + } + if seedErr != nil { + fmt.Fprintf(os.Stderr, " āš ļø Warning: DNS seeding failed after all attempts.\n") + fmt.Fprintf(os.Stderr, " Records will self-heal via node heartbeat once running.\n") } } diff --git a/pkg/environments/production/installers.go b/pkg/environments/production/installers.go index 833bf7a..e8cd8b1 100644 --- a/pkg/environments/production/installers.go +++ b/pkg/environments/production/installers.go @@ -138,8 +138,8 @@ func (bi *BinaryInstaller) InstallCaddy() error { } // ConfigureCaddy creates Caddy configuration files -func (bi *BinaryInstaller) ConfigureCaddy(domain string, email string, acmeEndpoint string) error { - return bi.caddy.Configure(domain, email, acmeEndpoint) +func (bi *BinaryInstaller) ConfigureCaddy(domain string, email string, acmeEndpoint string, baseDomain string) error { + return bi.caddy.Configure(domain, email, acmeEndpoint, baseDomain) } // Mock system commands for testing (if needed) diff --git a/pkg/environments/production/installers/caddy.go b/pkg/environments/production/installers/caddy.go index f91c08a..1954758 100644 --- a/pkg/environments/production/installers/caddy.go +++ b/pkg/environments/production/installers/caddy.go @@ -6,6 +6,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" ) const ( @@ -158,15 +159,17 @@ func (ci *CaddyInstaller) Install() error { return nil } -// Configure creates Caddy configuration files -func (ci *CaddyInstaller) Configure(domain string, email string, acmeEndpoint string) error { +// Configure creates Caddy configuration files. +// baseDomain is optional — if provided (and different from domain), Caddy will also +// serve traffic for the base domain and its wildcard (e.g., *.dbrs.space). +func (ci *CaddyInstaller) Configure(domain string, email string, acmeEndpoint string, baseDomain string) error { configDir := "/etc/caddy" if err := os.MkdirAll(configDir, 0755); err != nil { return fmt.Errorf("failed to create config directory: %w", err) } // Create Caddyfile - caddyfile := ci.generateCaddyfile(domain, email, acmeEndpoint) + caddyfile := ci.generateCaddyfile(domain, email, acmeEndpoint, baseDomain) if err := os.WriteFile(filepath.Join(configDir, "Caddyfile"), []byte(caddyfile), 0644); err != nil { return fmt.Errorf("failed to write Caddyfile: %w", err) } @@ -364,32 +367,31 @@ require ( ` } -// generateCaddyfile creates the Caddyfile configuration -func (ci *CaddyInstaller) generateCaddyfile(domain, email, acmeEndpoint string) string { - return fmt.Sprintf(`{ - email %s -} - -*.%s { - tls { +// generateCaddyfile creates the Caddyfile configuration. +// If baseDomain is provided and different from domain, Caddy also serves +// the base domain and its wildcard (e.g., *.dbrs.space alongside *.node1.dbrs.space). +func (ci *CaddyInstaller) generateCaddyfile(domain, email, acmeEndpoint, baseDomain string) string { + tlsBlock := fmt.Sprintf(` tls { dns orama { endpoint %s } - } - reverse_proxy localhost:6001 -} + }`, acmeEndpoint) -%s { - tls { - dns orama { - endpoint %s - } - } - reverse_proxy localhost:6001 -} + var sb strings.Builder + sb.WriteString(fmt.Sprintf("{\n email %s\n}\n", email)) -:80 { - reverse_proxy localhost:6001 -} -`, email, domain, acmeEndpoint, domain, acmeEndpoint) + // Node domain blocks (e.g., node1.dbrs.space, *.node1.dbrs.space) + sb.WriteString(fmt.Sprintf("\n*.%s {\n%s\n reverse_proxy localhost:6001\n}\n", domain, tlsBlock)) + sb.WriteString(fmt.Sprintf("\n%s {\n%s\n reverse_proxy localhost:6001\n}\n", domain, tlsBlock)) + + // Base domain blocks (e.g., dbrs.space, *.dbrs.space) — for app routing + if baseDomain != "" && baseDomain != domain { + sb.WriteString(fmt.Sprintf("\n*.%s {\n%s\n reverse_proxy localhost:6001\n}\n", baseDomain, tlsBlock)) + sb.WriteString(fmt.Sprintf("\n%s {\n%s\n reverse_proxy localhost:6001\n}\n", baseDomain, tlsBlock)) + } + + // HTTP fallback (handles plain HTTP and ACME challenges) + sb.WriteString("\n:80 {\n reverse_proxy localhost:6001\n}\n") + + return sb.String() } diff --git a/pkg/environments/production/orchestrator.go b/pkg/environments/production/orchestrator.go index a077eb3..b38399d 100644 --- a/pkg/environments/production/orchestrator.go +++ b/pkg/environments/production/orchestrator.go @@ -557,7 +557,7 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s } email := "admin@" + caddyDomain acmeEndpoint := "http://localhost:6001/v1/internal/acme" - if err := ps.binaryInstaller.ConfigureCaddy(caddyDomain, email, acmeEndpoint); err != nil { + if err := ps.binaryInstaller.ConfigureCaddy(caddyDomain, email, acmeEndpoint, baseDomain); err != nil { ps.logf(" āš ļø Caddy config warning: %v", err) } else { ps.logf(" āœ“ Caddy config generated") @@ -686,7 +686,8 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { } } - // Start services in dependency order + // Restart services in dependency order (restart instead of start ensures + // services pick up new configs even if already running from a previous install) ps.logf(" Starting services...") // Start infrastructure first (IPFS, Olric, Anyone) - RQLite is managed internally by each node @@ -705,9 +706,9 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { infraServices = append(infraServices, "debros-anyone-relay.service") } } - + for _, svc := range infraServices { - if err := ps.serviceController.StartService(svc); err != nil { + if err := ps.serviceController.RestartService(svc); err != nil { ps.logf(" āš ļø Failed to start %s: %v", svc, err) } else { ps.logf(" - %s started", svc) @@ -718,14 +719,14 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { time.Sleep(2 * time.Second) // Start IPFS Cluster - if err := ps.serviceController.StartService("debros-ipfs-cluster.service"); err != nil { + if err := ps.serviceController.RestartService("debros-ipfs-cluster.service"); err != nil { ps.logf(" āš ļø Failed to start debros-ipfs-cluster.service: %v", err) } else { ps.logf(" - debros-ipfs-cluster.service started") } // Start node service (gateway is embedded in node, no separate service needed) - if err := ps.serviceController.StartService("debros-node.service"); err != nil { + if err := ps.serviceController.RestartService("debros-node.service"); err != nil { ps.logf(" āš ļø Failed to start debros-node.service: %v", err) } else { ps.logf(" - debros-node.service started (with embedded gateway)") @@ -735,14 +736,14 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { // Caddy depends on debros-node.service (gateway on :6001), so start after node if ps.isNameserver { if _, err := os.Stat("/usr/local/bin/coredns"); err == nil { - if err := ps.serviceController.StartService("coredns.service"); err != nil { + if err := ps.serviceController.RestartService("coredns.service"); err != nil { ps.logf(" āš ļø Failed to start coredns.service: %v", err) } else { ps.logf(" - coredns.service started") } } if _, err := os.Stat("/usr/bin/caddy"); err == nil { - if err := ps.serviceController.StartService("caddy.service"); err != nil { + if err := ps.serviceController.RestartService("caddy.service"); err != nil { ps.logf(" āš ļø Failed to start caddy.service: %v", err) } else { ps.logf(" - caddy.service started") diff --git a/pkg/environments/production/services.go b/pkg/environments/production/services.go index 4f0641b..1fffcd3 100644 --- a/pkg/environments/production/services.go +++ b/pkg/environments/production/services.go @@ -216,8 +216,9 @@ func (ssg *SystemdServiceGenerator) GenerateNodeService() string { return fmt.Sprintf(`[Unit] Description=DeBros Network Node -After=debros-ipfs-cluster.service debros-olric.service +After=debros-ipfs-cluster.service debros-olric.service wg-quick@wg0.service Wants=debros-ipfs-cluster.service debros-olric.service +Requires=wg-quick@wg0.service [Service] Type=simple diff --git a/pkg/environments/production/wireguard.go b/pkg/environments/production/wireguard.go index 607ed3d..8bf4ce9 100644 --- a/pkg/environments/production/wireguard.go +++ b/pkg/environments/production/wireguard.go @@ -157,8 +157,11 @@ func (wp *WireGuardProvisioner) Enable() error { return fmt.Errorf("failed to enable wg-quick@wg0: %w\n%s", err, string(output)) } - // Start now - cmd = exec.Command("systemctl", "start", "wg-quick@wg0") + // Use restart instead of start. wg-quick@wg0 is a oneshot service with + // RemainAfterExit=yes, so "systemctl start" is a no-op if the service is + // already in "active (exited)" state (e.g. from a previous install that + // wasn't fully cleaned). "restart" always re-runs the ExecStart command. + cmd = exec.Command("systemctl", "restart", "wg-quick@wg0") if output, err := cmd.CombinedOutput(); err != nil { return fmt.Errorf("failed to start wg-quick@wg0: %w\n%s", err, string(output)) } diff --git a/pkg/node/dns_registration.go b/pkg/node/dns_registration.go index 9d93ee1..8adf276 100644 --- a/pkg/node/dns_registration.go +++ b/pkg/node/dns_registration.go @@ -118,12 +118,15 @@ func (n *Node) updateDNSHeartbeat(ctx context.Context) error { // ensureBaseDNSRecords ensures this node's IP is present in the base DNS records. // This provides self-healing: if records are missing (fresh install, DB reset), // the node recreates them on startup. Each node only manages its own IP entries. +// +// Records are created for BOTH the base domain (dbrs.space) and the node domain +// (node1.dbrs.space). The base domain records enable round-robin load balancing +// across all nodes. The node domain records enable direct node access. func (n *Node) ensureBaseDNSRecords(ctx context.Context) error { - domain := n.config.Node.Domain - if domain == "" { - domain = n.config.HTTPGateway.BaseDomain - } - if domain == "" { + baseDomain := n.config.HTTPGateway.BaseDomain + nodeDomain := n.config.Node.Domain + + if baseDomain == "" && nodeDomain == "" { return nil // No domain configured, skip } @@ -132,22 +135,32 @@ func (n *Node) ensureBaseDNSRecords(ctx context.Context) error { return fmt.Errorf("failed to determine node IP: %w", err) } - // Ensure trailing dot for FQDN format (as CoreDNS expects) - fqdn := domain + "." - wildcardFQDN := "*." + domain + "." - db := n.rqliteAdapter.GetSQLDB() + // Build list of A records to ensure + var records []struct { + fqdn string + value string + } + + // Base domain records (e.g., dbrs.space, *.dbrs.space) — for round-robin across all nodes + if baseDomain != "" { + records = append(records, + struct{ fqdn, value string }{baseDomain + ".", ipAddress}, + struct{ fqdn, value string }{"*." + baseDomain + ".", ipAddress}, + ) + } + + // Node-specific records (e.g., node1.dbrs.space, *.node1.dbrs.space) — for direct node access + if nodeDomain != "" && nodeDomain != baseDomain { + records = append(records, + struct{ fqdn, value string }{nodeDomain + ".", ipAddress}, + struct{ fqdn, value string }{"*." + nodeDomain + ".", ipAddress}, + ) + } + // Insert root A record and wildcard A record for this node's IP // ON CONFLICT DO NOTHING avoids duplicates (UNIQUE on fqdn, record_type, value) - records := []struct { - fqdn string - value string - }{ - {fqdn, ipAddress}, - {wildcardFQDN, ipAddress}, - } - for _, r := range records { query := `INSERT INTO dns_records (fqdn, record_type, value, ttl, namespace, created_by, is_active, created_at, updated_at) VALUES (?, 'A', ?, 300, 'system', 'system', TRUE, datetime('now'), datetime('now')) @@ -158,12 +171,64 @@ func (n *Node) ensureBaseDNSRecords(ctx context.Context) error { } } - // Claim an NS slot if available (ns1, ns2, or ns3) - n.claimNameserverSlot(ctx, domain, ipAddress) + // Ensure SOA and NS records exist for the base domain (self-healing) + if baseDomain != "" { + n.ensureSOAAndNSRecords(ctx, baseDomain) + } + + // Claim an NS slot for the base domain (ns1/ns2/ns3) + if baseDomain != "" { + n.claimNameserverSlot(ctx, baseDomain, ipAddress) + } return nil } +// ensureSOAAndNSRecords creates SOA and NS records for the base domain if they don't exist. +// These are normally seeded during install Phase 7, but if that fails (e.g. migrations +// not yet run), the heartbeat self-heals them here. +func (n *Node) ensureSOAAndNSRecords(ctx context.Context, baseDomain string) { + db := n.rqliteAdapter.GetSQLDB() + fqdn := baseDomain + "." + + // Check if SOA exists + var count int + err := db.QueryRowContext(ctx, + `SELECT COUNT(*) FROM dns_records WHERE fqdn = ? AND record_type = 'SOA'`, fqdn, + ).Scan(&count) + if err != nil || count > 0 { + return // SOA exists or query failed, skip + } + + n.logger.ComponentInfo(logging.ComponentNode, "SOA/NS records missing, self-healing", + zap.String("domain", baseDomain)) + + // Create SOA record + soaValue := fmt.Sprintf("ns1.%s. admin.%s. %d 3600 1800 604800 300", + baseDomain, baseDomain, time.Now().Unix()) + if _, err := db.ExecContext(ctx, + `INSERT INTO dns_records (fqdn, record_type, value, ttl, namespace, created_by, is_active, created_at, updated_at) + VALUES (?, 'SOA', ?, 300, 'system', 'system', TRUE, datetime('now'), datetime('now')) + ON CONFLICT(fqdn, record_type, value) DO NOTHING`, + fqdn, soaValue, + ); err != nil { + n.logger.ComponentWarn(logging.ComponentNode, "Failed to create SOA record", zap.Error(err)) + } + + // Create NS records (ns1, ns2, ns3) + for i := 1; i <= 3; i++ { + nsValue := fmt.Sprintf("ns%d.%s.", i, baseDomain) + if _, err := db.ExecContext(ctx, + `INSERT INTO dns_records (fqdn, record_type, value, ttl, namespace, created_by, is_active, created_at, updated_at) + VALUES (?, 'NS', ?, 300, 'system', 'system', TRUE, datetime('now'), datetime('now')) + ON CONFLICT(fqdn, record_type, value) DO NOTHING`, + fqdn, nsValue, + ); err != nil { + n.logger.ComponentWarn(logging.ComponentNode, "Failed to create NS record", zap.Error(err)) + } + } +} + // claimNameserverSlot attempts to claim an available NS hostname (ns1/ns2/ns3) for this node. // If the node already has a slot, it updates the IP. If no slot is available, it does nothing. func (n *Node) claimNameserverSlot(ctx context.Context, domain, ipAddress string) { @@ -236,11 +301,11 @@ func (n *Node) cleanupStaleNodeRecords(ctx context.Context) { return } - domain := n.config.Node.Domain - if domain == "" { - domain = n.config.HTTPGateway.BaseDomain + baseDomain := n.config.HTTPGateway.BaseDomain + if baseDomain == "" { + baseDomain = n.config.Node.Domain } - if domain == "" { + if baseDomain == "" { return } @@ -255,8 +320,12 @@ func (n *Node) cleanupStaleNodeRecords(ctx context.Context) { } defer rows.Close() - fqdn := domain + "." - wildcardFQDN := "*." + domain + "." + // Build all FQDNs to clean: base domain + node domain + var fqdnsToClean []string + fqdnsToClean = append(fqdnsToClean, baseDomain+".", "*."+baseDomain+".") + if n.config.Node.Domain != "" && n.config.Node.Domain != baseDomain { + fqdnsToClean = append(fqdnsToClean, n.config.Node.Domain+".", "*."+n.config.Node.Domain+".") + } for rows.Next() { var nodeID, ip string @@ -270,7 +339,7 @@ func (n *Node) cleanupStaleNodeRecords(ctx context.Context) { } // Remove the dead node's A records from round-robin - for _, f := range []string{fqdn, wildcardFQDN} { + for _, f := range fqdnsToClean { if _, err := db.ExecContext(ctx, `DELETE FROM dns_records WHERE fqdn = ? AND record_type = 'A' AND value = ? AND namespace = 'system'`, f, ip); err != nil { n.logger.ComponentWarn(logging.ComponentNode, "Failed to remove stale DNS record", zap.String("fqdn", f), zap.String("ip", ip), zap.Error(err)) @@ -284,7 +353,7 @@ func (n *Node) cleanupStaleNodeRecords(ctx context.Context) { // Remove glue records for this node's IP (ns1.domain., ns2.domain., ns3.domain.) for _, ns := range []string{"ns1", "ns2", "ns3"} { - nsFQDN := ns + "." + domain + "." + nsFQDN := ns + "." + baseDomain + "." if _, err := db.ExecContext(ctx, `DELETE FROM dns_records WHERE fqdn = ? AND record_type = 'A' AND value = ? AND namespace = 'system'`, nsFQDN, ip,