diff --git a/pkg/gateway/dependencies.go b/pkg/gateway/dependencies.go index d3f1eda..c583ad9 100644 --- a/pkg/gateway/dependencies.go +++ b/pkg/gateway/dependencies.go @@ -14,6 +14,7 @@ import ( "strings" "time" + "github.com/DeBrosOfficial/network/migrations" "github.com/DeBrosOfficial/network/pkg/client" "github.com/DeBrosOfficial/network/pkg/config" "github.com/DeBrosOfficial/network/pkg/gateway/auth" @@ -155,6 +156,18 @@ func initializeRQLite(logger *logging.ColoredLogger, cfg *Config, deps *Dependen zap.Duration("timeout", deps.ORMHTTP.Timeout), ) + // Apply embedded migrations to ensure schema is up-to-date. + // This is critical for namespace gateways whose RQLite instances + // don't get migrations from the main cluster RQLiteManager. + migCtx, migCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer migCancel() + if err := rqlite.ApplyEmbeddedMigrations(migCtx, db, migrations.FS, logger.Logger); err != nil { + logger.ComponentWarn(logging.ComponentGeneral, "Failed to apply embedded migrations to gateway RQLite", + zap.Error(err)) + } else { + logger.ComponentInfo(logging.ComponentGeneral, "Embedded migrations applied to gateway RQLite") + } + return nil } diff --git a/pkg/namespace/cluster_manager.go b/pkg/namespace/cluster_manager.go index 0c0e15a..cc6903a 100644 --- a/pkg/namespace/cluster_manager.go +++ b/pkg/namespace/cluster_manager.go @@ -576,12 +576,33 @@ func (cm *ClusterManager) sendStopRequest(ctx context.Context, nodeIP, action, n } } -// createDNSRecords creates DNS records for the namespace gateway +// createDNSRecords creates DNS records for the namespace gateway. +// Only nameserver nodes get DNS A records, because only they run Caddy +// and can serve TLS for ns-{namespace}.{baseDomain} subdomains. func (cm *ClusterManager) createDNSRecords(ctx context.Context, cluster *NamespaceCluster, nodes []NodeCapacity, portBlocks []*PortBlock) error { - // Create A records for ns-{namespace}.{baseDomain} pointing to all 3 nodes fqdn := fmt.Sprintf("ns-%s.%s.", cluster.NamespaceName, cm.baseDomain) + // Query nameserver node IDs so we only add DNS records for nodes that can serve TLS + type nsRow struct { + NodeID string `db:"node_id"` + } + var nameservers []nsRow + _ = cm.db.Query(ctx, &nameservers, `SELECT node_id FROM dns_nameservers`) + nsSet := make(map[string]bool, len(nameservers)) + for _, ns := range nameservers { + nsSet[ns.NodeID] = true + } + + recordCount := 0 for i, node := range nodes { + if len(nsSet) > 0 && !nsSet[node.NodeID] { + cm.logger.Info("Skipping DNS record for non-nameserver node", + zap.String("node_id", node.NodeID), + zap.String("ip", node.IPAddress), + ) + continue + } + query := ` INSERT INTO dns_records (fqdn, record_type, value, ttl, namespace, created_by) VALUES (?, 'A', ?, 300, ?, 'system') @@ -599,10 +620,11 @@ func (cm *ClusterManager) createDNSRecords(ctx context.Context, cluster *Namespa zap.String("ip", node.IPAddress), zap.Int("gateway_port", portBlocks[i].GatewayHTTPPort), ) + recordCount++ } } - cm.logEvent(ctx, cluster.ID, EventDNSCreated, "", fmt.Sprintf("DNS records created for %s", fqdn), nil) + cm.logEvent(ctx, cluster.ID, EventDNSCreated, "", fmt.Sprintf("DNS records created for %s (%d records)", fqdn, recordCount), nil) return nil } diff --git a/pkg/node/dns_registration.go b/pkg/node/dns_registration.go index 197af09..ceba888 100644 --- a/pkg/node/dns_registration.go +++ b/pkg/node/dns_registration.go @@ -2,6 +2,7 @@ package node import ( "context" + "database/sql" "fmt" "net" "os" @@ -140,6 +141,11 @@ func (n *Node) ensureBaseDNSRecords(ctx context.Context) error { db := n.rqliteAdapter.GetSQLDB() + // Clean up any private IP A records left by old code versions. + // Old code could insert WireGuard IPs (10.0.0.x) into dns_records. + // This self-heals on every heartbeat cycle. + cleanupPrivateIPRecords(ctx, db, n.logger) + // Build list of A records to ensure var records []struct { fqdn string @@ -466,3 +472,22 @@ func (n *Node) getNodeIPAddress() (string, error) { } return localAddr.IP.String(), nil } + +// cleanupPrivateIPRecords deletes any A records with private/loopback IPs from dns_records. +// Old code versions could insert WireGuard IPs (10.0.0.x) into the table. This runs on +// every heartbeat to self-heal. +func cleanupPrivateIPRecords(ctx context.Context, db *sql.DB, logger *logging.ColoredLogger) { + query := `DELETE FROM dns_records WHERE record_type = 'A' AND namespace = 'system' + AND (value LIKE '10.%' OR value LIKE '172.16.%' OR value LIKE '172.17.%' OR value LIKE '172.18.%' + OR value LIKE '172.19.%' OR value LIKE '172.2_.%' OR value LIKE '172.30.%' OR value LIKE '172.31.%' + OR value LIKE '192.168.%' OR value = '127.0.0.1')` + result, err := db.ExecContext(ctx, query) + if err != nil { + logger.ComponentWarn(logging.ComponentNode, "Failed to clean up private IP DNS records", zap.Error(err)) + return + } + if rows, _ := result.RowsAffected(); rows > 0 { + logger.ComponentInfo(logging.ComponentNode, "Cleaned up private IP DNS records", + zap.Int64("deleted", rows)) + } +} diff --git a/pkg/olric/instance_spawner.go b/pkg/olric/instance_spawner.go index be89d57..7e321d8 100644 --- a/pkg/olric/instance_spawner.go +++ b/pkg/olric/instance_spawner.go @@ -503,8 +503,12 @@ func (oi *OlricInstance) IsHealthy(ctx context.Context) (bool, error) { return true, nil } -// DSN returns the connection address for this Olric instance +// DSN returns the connection address for this Olric instance. +// Uses the bind address if set (e.g. WireGuard IP), since Olric may not listen on localhost. func (oi *OlricInstance) DSN() string { + if oi.BindAddr != "" { + return fmt.Sprintf("%s:%d", oi.BindAddr, oi.HTTPPort) + } return fmt.Sprintf("localhost:%d", oi.HTTPPort) }