package rqlite import ( "context" "encoding/json" "errors" "fmt" "io" "net/http" "os" "os/exec" "path/filepath" "strconv" "strings" "syscall" "time" "github.com/rqlite/gorqlite" "go.uber.org/zap" "github.com/DeBrosOfficial/network/pkg/config" ) // RQLiteManager manages an RQLite node instance type RQLiteManager struct { config *config.DatabaseConfig discoverConfig *config.DiscoveryConfig dataDir string logger *zap.Logger cmd *exec.Cmd connection *gorqlite.Connection discoveryService *ClusterDiscoveryService } // waitForSQLAvailable waits until a simple query succeeds, indicating a leader is known and queries can be served. func (r *RQLiteManager) waitForSQLAvailable(ctx context.Context) error { ticker := time.NewTicker(1 * time.Second) defer ticker.Stop() attempts := 0 for { select { case <-ctx.Done(): return ctx.Err() case <-ticker.C: // Check for nil connection inside the loop to handle cases where // connection becomes nil during restart/recovery operations if r.connection == nil { attempts++ if attempts%5 == 0 { // log every ~5s to reduce noise r.logger.Debug("Waiting for RQLite connection to be established") } continue } attempts++ _, err := r.connection.QueryOne("SELECT 1") if err == nil { r.logger.Info("RQLite SQL is available") return nil } if attempts%5 == 0 { // log every ~5s to reduce noise r.logger.Debug("Waiting for RQLite SQL availability", zap.Error(err)) } } } } // NewRQLiteManager creates a new RQLite manager func NewRQLiteManager(cfg *config.DatabaseConfig, discoveryCfg *config.DiscoveryConfig, dataDir string, logger *zap.Logger) *RQLiteManager { return &RQLiteManager{ config: cfg, discoverConfig: discoveryCfg, dataDir: dataDir, logger: logger.With(zap.String("component", "rqlite-manager")), } } // SetDiscoveryService sets the cluster discovery service for this RQLite manager func (r *RQLiteManager) SetDiscoveryService(service *ClusterDiscoveryService) { r.discoveryService = service } // UpdateAdvertisedAddresses overrides the discovery advertised addresses when cluster discovery // infers a better host than what was provided via configuration (e.g. replacing localhost). func (r *RQLiteManager) UpdateAdvertisedAddresses(raftAddr, httpAddr string) { if r == nil || r.discoverConfig == nil { return } if raftAddr != "" && r.discoverConfig.RaftAdvAddress != raftAddr { r.logger.Info("Updating Raft advertised address", zap.String("addr", raftAddr)) r.discoverConfig.RaftAdvAddress = raftAddr } if httpAddr != "" && r.discoverConfig.HttpAdvAddress != httpAddr { r.logger.Info("Updating HTTP advertised address", zap.String("addr", httpAddr)) r.discoverConfig.HttpAdvAddress = httpAddr } } // Start starts the RQLite node func (r *RQLiteManager) Start(ctx context.Context) error { rqliteDataDir, err := r.prepareDataDir() if err != nil { return err } if r.discoverConfig.HttpAdvAddress == "" { return fmt.Errorf("discovery config HttpAdvAddress is empty") } // CRITICAL FIX: Ensure peers.json exists with minimum cluster size BEFORE starting RQLite // This prevents split-brain where each node starts as a single-node cluster // We NEVER start as a single-node cluster - we wait indefinitely until minimum cluster size is met // This applies to ALL nodes (bootstrap AND regular nodes with join addresses) if r.discoveryService != nil { r.logger.Info("Ensuring peers.json exists with minimum cluster size before RQLite startup", zap.String("policy", "will wait indefinitely - never start as single-node cluster"), zap.Bool("has_join_address", r.config.RQLiteJoinAddress != "")) // Wait for peer discovery to find minimum cluster size - NO TIMEOUT // This ensures we never start as a single-node cluster, regardless of join address if err := r.waitForMinClusterSizeBeforeStart(ctx, rqliteDataDir); err != nil { r.logger.Error("Failed to ensure minimum cluster size before start", zap.Error(err), zap.String("action", "startup aborted - will not start as single-node cluster")) return fmt.Errorf("cannot start RQLite: minimum cluster size not met: %w", err) } } // CRITICAL: Check if we need to do pre-start cluster discovery to build peers.json // This handles the case where nodes have old cluster state and need coordinated recovery if needsClusterRecovery, err := r.checkNeedsClusterRecovery(rqliteDataDir); err != nil { return fmt.Errorf("failed to check cluster recovery status: %w", err) } else if needsClusterRecovery { r.logger.Info("Detected old cluster state requiring coordinated recovery") if err := r.performPreStartClusterDiscovery(ctx, rqliteDataDir); err != nil { return fmt.Errorf("pre-start cluster discovery failed: %w", err) } } // Launch RQLite process if err := r.launchProcess(ctx, rqliteDataDir); err != nil { return err } // Wait for RQLite to be ready and establish connection if err := r.waitForReadyAndConnect(ctx); err != nil { return err } // Start periodic health monitoring for automatic recovery if r.discoveryService != nil { go r.startHealthMonitoring(ctx) } // Establish leadership/SQL availability if err := r.establishLeadershipOrJoin(ctx, rqliteDataDir); err != nil { return err } // Apply migrations - resolve path for production vs development migrationsDir, err := r.resolveMigrationsDir() if err != nil { r.logger.Error("Failed to resolve migrations directory", zap.Error(err)) return fmt.Errorf("resolve migrations directory: %w", err) } if err := r.ApplyMigrations(ctx, migrationsDir); err != nil { r.logger.Error("Migrations failed", zap.Error(err), zap.String("dir", migrationsDir)) return fmt.Errorf("apply migrations: %w", err) } r.logger.Info("RQLite node started successfully") return nil } // rqliteDataDirPath returns the resolved path to the RQLite data directory // This centralizes the path resolution logic used throughout the codebase func (r *RQLiteManager) rqliteDataDirPath() (string, error) { // Expand ~ in data directory path dataDir := os.ExpandEnv(r.dataDir) if strings.HasPrefix(dataDir, "~") { home, err := os.UserHomeDir() if err != nil { return "", fmt.Errorf("failed to determine home directory: %w", err) } dataDir = filepath.Join(home, dataDir[1:]) } return filepath.Join(dataDir, "rqlite"), nil } // resolveMigrationsDir resolves the migrations directory path for production vs development // In production, migrations are at /home/debros/src/migrations // In development, migrations are relative to the project root (migrations/) func (r *RQLiteManager) resolveMigrationsDir() (string, error) { // Check for production path first: /home/debros/src/migrations productionPath := "/home/debros/src/migrations" if _, err := os.Stat(productionPath); err == nil { r.logger.Info("Using production migrations directory", zap.String("path", productionPath)) return productionPath, nil } // Fall back to relative path for development devPath := "migrations" r.logger.Info("Using development migrations directory", zap.String("path", devPath)) return devPath, nil } // prepareDataDir expands and creates the RQLite data directory func (r *RQLiteManager) prepareDataDir() (string, error) { rqliteDataDir, err := r.rqliteDataDirPath() if err != nil { return "", err } // Create data directory if err := os.MkdirAll(rqliteDataDir, 0755); err != nil { return "", fmt.Errorf("failed to create RQLite data directory: %w", err) } return rqliteDataDir, nil } // launchProcess starts the RQLite process with appropriate arguments func (r *RQLiteManager) launchProcess(ctx context.Context, rqliteDataDir string) error { // Build RQLite command args := []string{ "-http-addr", fmt.Sprintf("0.0.0.0:%d", r.config.RQLitePort), "-http-adv-addr", r.discoverConfig.HttpAdvAddress, "-raft-adv-addr", r.discoverConfig.RaftAdvAddress, "-raft-addr", fmt.Sprintf("0.0.0.0:%d", r.config.RQLiteRaftPort), } // Add join address if specified (for non-bootstrap or secondary bootstrap nodes) if r.config.RQLiteJoinAddress != "" { r.logger.Info("Joining RQLite cluster", zap.String("join_address", r.config.RQLiteJoinAddress)) // Normalize join address to host:port for rqlited -join joinArg := r.config.RQLiteJoinAddress if strings.HasPrefix(joinArg, "http://") { joinArg = strings.TrimPrefix(joinArg, "http://") } else if strings.HasPrefix(joinArg, "https://") { joinArg = strings.TrimPrefix(joinArg, "https://") } // Wait for join target to become reachable to avoid forming a separate cluster // Use 5 minute timeout to prevent infinite waits on bad configurations joinTimeout := 5 * time.Minute if err := r.waitForJoinTarget(ctx, r.config.RQLiteJoinAddress, joinTimeout); err != nil { r.logger.Warn("Join target did not become reachable within timeout; will still attempt to join", zap.String("join_address", r.config.RQLiteJoinAddress), zap.Duration("timeout", joinTimeout), zap.Error(err)) } // Always add the join parameter in host:port form - let rqlited handle the rest // Add retry parameters to handle slow cluster startup (e.g., during recovery) args = append(args, "-join", joinArg, "-join-attempts", "30", "-join-interval", "10s") } else { r.logger.Info("No join address specified - starting as new cluster") // For bootstrap nodes, use bootstrap-expect if we know about other peers if r.discoveryService != nil { allPeers := r.discoveryService.GetAllPeers() remotePeerCount := 0 for _, peer := range allPeers { if peer.NodeID != r.discoverConfig.RaftAdvAddress { remotePeerCount++ } } // Use bootstrap-expect if we have discovered enough peers // This tells RQLite to wait for the expected number of nodes before forming cluster if remotePeerCount >= (r.config.MinClusterSize - 1) { expectedPeers := r.config.MinClusterSize args = append(args, "-bootstrap-expect", strconv.Itoa(expectedPeers)) r.logger.Info("Using bootstrap-expect to wait for cluster formation", zap.Int("expected_peers", expectedPeers), zap.Int("remote_peers_discovered", remotePeerCount)) } } } // Add data directory as positional argument args = append(args, rqliteDataDir) r.logger.Info("Starting RQLite node", zap.String("data_dir", rqliteDataDir), zap.Int("http_port", r.config.RQLitePort), zap.Int("raft_port", r.config.RQLiteRaftPort), zap.String("join_address", r.config.RQLiteJoinAddress)) // Start RQLite process (not bound to ctx for graceful Stop handling) r.cmd = exec.Command("rqlited", args...) // Enable debug logging of RQLite process to help diagnose issues r.cmd.Stdout = os.Stdout r.cmd.Stderr = os.Stderr if err := r.cmd.Start(); err != nil { return fmt.Errorf("failed to start RQLite: %w", err) } return nil } // waitForReadyAndConnect waits for RQLite to be ready and establishes connection // For joining nodes, retries if gorqlite.Open fails with "store is not open" error func (r *RQLiteManager) waitForReadyAndConnect(ctx context.Context) error { // Wait for RQLite to be ready if err := r.waitForReady(ctx); err != nil { if r.cmd != nil && r.cmd.Process != nil { _ = r.cmd.Process.Kill() } return fmt.Errorf("RQLite failed to become ready: %w", err) } // For joining nodes, retry gorqlite.Open if store is not yet open // This handles recovery scenarios where the store opens after HTTP is responsive var conn *gorqlite.Connection var err error maxConnectAttempts := 10 connectBackoff := 500 * time.Millisecond for attempt := 0; attempt < maxConnectAttempts; attempt++ { // Create connection conn, err = gorqlite.Open(fmt.Sprintf("http://localhost:%d", r.config.RQLitePort)) if err == nil { // Success r.connection = conn r.logger.Debug("Successfully connected to RQLite", zap.Int("attempt", attempt+1)) break } // Check if error is "store is not open" (recovery scenario) if strings.Contains(err.Error(), "store is not open") { if attempt < maxConnectAttempts-1 { // Only retry for joining nodes; bootstrap nodes should fail fast if r.config.RQLiteJoinAddress != "" { if attempt%3 == 0 { r.logger.Debug("RQLite store not yet accessible for connection, retrying...", zap.Int("attempt", attempt+1), zap.Error(err)) } time.Sleep(connectBackoff) connectBackoff = time.Duration(float64(connectBackoff) * 1.5) if connectBackoff > 5*time.Second { connectBackoff = 5 * time.Second } continue } } } // For any other error or final attempt, fail if r.cmd != nil && r.cmd.Process != nil { _ = r.cmd.Process.Kill() } return fmt.Errorf("failed to connect to RQLite: %w", err) } if conn == nil { if r.cmd != nil && r.cmd.Process != nil { _ = r.cmd.Process.Kill() } return fmt.Errorf("failed to establish RQLite connection after %d attempts", maxConnectAttempts) } // Sanity check: verify rqlite's node ID matches our configured raft address if err := r.validateNodeID(); err != nil { r.logger.Debug("Node ID validation skipped", zap.Error(err)) // Don't fail startup, but log at debug level } return nil } // establishLeadershipOrJoin establishes leadership (bootstrap) or waits for SQL availability (joining) func (r *RQLiteManager) establishLeadershipOrJoin(ctx context.Context, rqliteDataDir string) error { if r.config.RQLiteJoinAddress == "" { // Bootstrap node logic with data safety checks r.logger.Info("Bootstrap node: checking if safe to lead") // SAFETY: Check if we can safely become leader canLead, err := r.canSafelyBecomeLeader() if !canLead && err != nil { r.logger.Warn("Not safe to become leader, attempting to join existing cluster", zap.Error(err)) // Find node with highest log index and join it if r.discoveryService != nil { targetNode := r.discoveryService.GetNodeWithHighestLogIndex() if targetNode != nil { r.logger.Info("Joining node with higher data", zap.String("target_node", targetNode.NodeID), zap.String("raft_address", targetNode.RaftAddress), zap.Uint64("their_index", targetNode.RaftLogIndex)) return r.joinExistingCluster(ctx, targetNode.RaftAddress) } } } // Safe to lead - attempt leadership leadershipErr := r.waitForLeadership(ctx) if leadershipErr == nil { r.logger.Info("Bootstrap node successfully established leadership") return nil } r.logger.Warn("Initial leadership attempt failed, may need cluster recovery", zap.Error(leadershipErr)) // Try recovery if we have peers.json from discovery if r.discoveryService != nil { peersPath := filepath.Join(rqliteDataDir, "raft", "peers.json") if _, err := os.Stat(peersPath); err == nil { r.logger.Info("Attempting cluster recovery using peers.json", zap.String("peers_file", peersPath)) if recoveryErr := r.recoverCluster(ctx, peersPath); recoveryErr == nil { r.logger.Info("Cluster recovery successful, retrying leadership") leadershipErr = r.waitForLeadership(ctx) if leadershipErr == nil { r.logger.Info("Bootstrap node established leadership after recovery") return nil } } else { r.logger.Warn("Cluster recovery failed", zap.Error(recoveryErr)) } } // Check if we're stuck in configuration mismatch after recovery failed if leadershipErr != nil && r.isStuckInConfigurationMismatch() { r.logger.Warn("Detected persistent configuration mismatch, attempting automatic recovery") // Verify it's safe to clear state (peers have higher log indexes) if r.isSafeToClearState(rqliteDataDir) { r.logger.Info("Clearing stale Raft state to resolve configuration mismatch") if err := r.clearRaftState(rqliteDataDir); err != nil { r.logger.Error("Failed to clear Raft state", zap.Error(err)) } else { // Force write peers.json after clearing state if r.discoveryService != nil { r.logger.Info("Force writing peers.json after clearing state for configuration mismatch recovery") if err := r.discoveryService.ForceWritePeersJSON(); err != nil { r.logger.Error("Failed to force write peers.json", zap.Error(err)) } // Update peersPath after force write peersPath = filepath.Join(rqliteDataDir, "raft", "peers.json") } // Restart RQLite with clean state r.logger.Info("Raft state cleared, restarting RQLite for clean rejoin") if recoveryErr := r.recoverCluster(ctx, peersPath); recoveryErr == nil { // Retry leadership after state clear leadershipErr = r.waitForLeadership(ctx) if leadershipErr == nil { r.logger.Info("Bootstrap node established leadership after state clear") return nil } } } } else { r.logger.Warn("Configuration mismatch detected but clearing state is unsafe", zap.String("reason", "peers may not have more recent data"), zap.String("action", "manual intervention may be required")) } } } // Final fallback: SQL availability r.logger.Warn("Leadership failed, trying SQL availability") sqlCtx := ctx if _, hasDeadline := ctx.Deadline(); !hasDeadline { var cancel context.CancelFunc sqlCtx, cancel = context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() } if err := r.waitForSQLAvailable(sqlCtx); err != nil { if r.cmd != nil && r.cmd.Process != nil { _ = r.cmd.Process.Kill() } return fmt.Errorf("RQLite SQL not available: %w", err) } return nil } else { // Joining node logic r.logger.Info("Waiting for RQLite SQL availability (leader discovery)") sqlCtx := ctx if _, hasDeadline := ctx.Deadline(); !hasDeadline { var cancel context.CancelFunc sqlCtx, cancel = context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() } if err := r.waitForSQLAvailable(sqlCtx); err != nil { if r.cmd != nil && r.cmd.Process != nil { _ = r.cmd.Process.Kill() } return fmt.Errorf("RQLite SQL not available: %w", err) } return nil } } // hasExistingState returns true if the rqlite data directory already contains files or subdirectories. func (r *RQLiteManager) hasExistingState(rqliteDataDir string) bool { entries, err := os.ReadDir(rqliteDataDir) if err != nil { return false } for _, e := range entries { // Any existing file or directory indicates prior state if e.Name() == "." || e.Name() == ".." { continue } return true } return false } // waitForReady waits for RQLite to be ready to accept connections // It checks for HTTP 200 + valid raft state (leader/follower) // The store may not be fully open initially during recovery, but connection retries will handle it // For joining nodes in recovery, this may take longer (up to 3 minutes) func (r *RQLiteManager) waitForReady(ctx context.Context) error { url := fmt.Sprintf("http://localhost:%d/status", r.config.RQLitePort) client := &http.Client{Timeout: 2 * time.Second} // Determine timeout based on whether this is a joining node // Joining nodes in recovery may take longer to open the store var maxAttempts int if r.config.RQLiteJoinAddress != "" { // Joining node: allow up to 180 seconds (3 minutes) for recovery maxAttempts = 180 } else { // Bootstrap node: allow 30 seconds maxAttempts = 30 } for i := 0; i < maxAttempts; i++ { select { case <-ctx.Done(): return ctx.Err() default: } resp, err := client.Get(url) if err == nil && resp.StatusCode == http.StatusOK { // Parse the response to check for valid raft state body, err := io.ReadAll(resp.Body) resp.Body.Close() if err == nil { var statusResp map[string]interface{} if err := json.Unmarshal(body, &statusResp); err == nil { // Check for valid raft state (leader or follower) // If raft is established, we consider the node ready even if store.open is false // The store will eventually open during recovery, and connection retries will handle it if raft, ok := statusResp["raft"].(map[string]interface{}); ok { state, ok := raft["state"].(string) if ok && (state == "leader" || state == "follower") { r.logger.Debug("RQLite raft ready", zap.String("state", state), zap.Int("attempt", i+1)) return nil } // Raft not yet ready (likely in candidate state) if i%10 == 0 { r.logger.Debug("RQLite raft not yet ready", zap.String("state", state), zap.Int("attempt", i+1)) } } else { // If no raft field, fall back to treating HTTP 200 as ready // (for backwards compatibility with older RQLite versions) r.logger.Debug("RQLite HTTP responsive (no raft field)", zap.Int("attempt", i+1)) return nil } } else { resp.Body.Close() } } } else if err != nil && i%20 == 0 { // Log connection errors only periodically (every ~20s) r.logger.Debug("RQLite not yet reachable", zap.Int("attempt", i+1), zap.Error(err)) } else if resp != nil { resp.Body.Close() } time.Sleep(1 * time.Second) } return fmt.Errorf("RQLite did not become ready within timeout") } // waitForLeadership waits for RQLite to establish leadership (for bootstrap nodes) func (r *RQLiteManager) waitForLeadership(ctx context.Context) error { r.logger.Info("Waiting for RQLite to establish leadership...") maxAttempts := 30 attempt := 0 backoffDelay := 500 * time.Millisecond maxBackoff := 5 * time.Second for attempt < maxAttempts { select { case <-ctx.Done(): return ctx.Err() default: } // Try a simple query to check if leadership is established if r.connection != nil { _, err := r.connection.QueryOne("SELECT 1") if err == nil { r.logger.Info("RQLite leadership established") return nil } // Log every 5th attempt or on first attempt to reduce noise if attempt%5 == 0 || attempt == 0 { r.logger.Debug("Waiting for leadership", zap.Int("attempt", attempt+1), zap.Error(err)) } } // Exponential backoff with jitter time.Sleep(backoffDelay) backoffDelay = time.Duration(float64(backoffDelay) * 1.5) if backoffDelay > maxBackoff { backoffDelay = maxBackoff } attempt++ } return fmt.Errorf("RQLite failed to establish leadership within timeout") } // GetConnection returns the RQLite connection func (r *RQLiteManager) GetConnection() *gorqlite.Connection { return r.connection } // Stop stops the RQLite node func (r *RQLiteManager) Stop() error { if r.connection != nil { r.connection.Close() r.connection = nil } if r.cmd == nil || r.cmd.Process == nil { return nil } r.logger.Info("Stopping RQLite node (graceful)") // Try SIGTERM first if err := r.cmd.Process.Signal(syscall.SIGTERM); err != nil { // Fallback to Kill if signaling fails _ = r.cmd.Process.Kill() return nil } // Wait up to 5 seconds for graceful shutdown done := make(chan error, 1) go func() { done <- r.cmd.Wait() }() select { case err := <-done: if err != nil && !errors.Is(err, os.ErrClosed) { r.logger.Warn("RQLite process exited with error", zap.Error(err)) } case <-time.After(5 * time.Second): r.logger.Warn("RQLite did not exit in time; killing") _ = r.cmd.Process.Kill() } return nil } // waitForJoinTarget waits until the join target's HTTP status becomes reachable, or until timeout func (r *RQLiteManager) waitForJoinTarget(ctx context.Context, joinAddress string, timeout time.Duration) error { var deadline time.Time if timeout > 0 { deadline = time.Now().Add(timeout) } var lastErr error for { if err := r.testJoinAddress(joinAddress); err == nil { r.logger.Info("Join target is reachable, proceeding with cluster join") return nil } else { lastErr = err r.logger.Debug("Join target not yet reachable; waiting...", zap.String("join_address", joinAddress), zap.Error(err)) } // Check context select { case <-ctx.Done(): return ctx.Err() case <-time.After(2 * time.Second): } if !deadline.IsZero() && time.Now().After(deadline) { break } } return lastErr } // waitForMinClusterSizeBeforeStart waits for minimum cluster size to be discovered // and ensures peers.json exists before RQLite starts // CRITICAL: This function waits INDEFINITELY - it will NEVER timeout // We never start as a single-node cluster, regardless of how long we wait func (r *RQLiteManager) waitForMinClusterSizeBeforeStart(ctx context.Context, rqliteDataDir string) error { if r.discoveryService == nil { return fmt.Errorf("discovery service not available") } requiredRemotePeers := r.config.MinClusterSize - 1 r.logger.Info("Waiting for minimum cluster size before RQLite startup", zap.Int("min_cluster_size", r.config.MinClusterSize), zap.Int("required_remote_peers", requiredRemotePeers), zap.String("policy", "waiting indefinitely - will never start as single-node cluster")) // Trigger peer exchange to collect metadata if err := r.discoveryService.TriggerPeerExchange(ctx); err != nil { r.logger.Warn("Peer exchange failed", zap.Error(err)) } // NO TIMEOUT - wait indefinitely until minimum cluster size is met // Only exit on context cancellation or when minimum cluster size is achieved checkInterval := 2 * time.Second lastLogTime := time.Now() for { // Check context cancellation first select { case <-ctx.Done(): return fmt.Errorf("context cancelled while waiting for minimum cluster size: %w", ctx.Err()) default: } // Trigger sync to update knownPeers r.discoveryService.TriggerSync() time.Sleep(checkInterval) // Check if we have enough remote peers allPeers := r.discoveryService.GetAllPeers() remotePeerCount := 0 for _, peer := range allPeers { if peer.NodeID != r.discoverConfig.RaftAdvAddress { remotePeerCount++ } } if remotePeerCount >= requiredRemotePeers { // Found enough peers - verify peers.json exists and contains them peersPath := filepath.Join(rqliteDataDir, "raft", "peers.json") // Trigger one more sync to ensure peers.json is written r.discoveryService.TriggerSync() time.Sleep(2 * time.Second) // Verify peers.json exists and contains enough peers if info, err := os.Stat(peersPath); err == nil && info.Size() > 10 { // Read and verify it contains enough peers data, err := os.ReadFile(peersPath) if err == nil { var peers []map[string]interface{} if err := json.Unmarshal(data, &peers); err == nil && len(peers) >= requiredRemotePeers { r.logger.Info("peers.json exists with minimum cluster size, safe to start RQLite", zap.String("peers_file", peersPath), zap.Int("remote_peers_discovered", remotePeerCount), zap.Int("peers_in_json", len(peers)), zap.Int("min_cluster_size", r.config.MinClusterSize)) return nil } } } } // Log progress every 10 seconds if time.Since(lastLogTime) >= 10*time.Second { r.logger.Info("Waiting for minimum cluster size (indefinitely)...", zap.Int("discovered_peers", len(allPeers)), zap.Int("remote_peers", remotePeerCount), zap.Int("required_remote_peers", requiredRemotePeers), zap.String("status", "will continue waiting until minimum cluster size is met")) lastLogTime = time.Now() } } } // testJoinAddress tests if a join address is reachable func (r *RQLiteManager) testJoinAddress(joinAddress string) error { // Determine the HTTP status URL to probe. // If joinAddress contains a scheme, use it directly. Otherwise treat joinAddress // as host:port (Raft) and probe the standard HTTP API port 5001 on that host. client := &http.Client{Timeout: 5 * time.Second} var statusURL string if strings.HasPrefix(joinAddress, "http://") || strings.HasPrefix(joinAddress, "https://") { statusURL = strings.TrimRight(joinAddress, "/") + "/status" } else { // Extract host from host:port host := joinAddress if idx := strings.Index(joinAddress, ":"); idx != -1 { host = joinAddress[:idx] } statusURL = fmt.Sprintf("http://%s:%d/status", host, 5001) } r.logger.Debug("Testing join target via HTTP", zap.String("url", statusURL)) resp, err := client.Get(statusURL) if err != nil { return fmt.Errorf("failed to connect to leader HTTP at %s: %w", statusURL, err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return fmt.Errorf("leader HTTP at %s returned status %d", statusURL, resp.StatusCode) } r.logger.Info("Leader HTTP reachable", zap.String("status_url", statusURL)) return nil } // canSafelyBecomeLeader checks if this node can safely become leader without causing data loss func (r *RQLiteManager) canSafelyBecomeLeader() (bool, error) { // Get our current Raft log index ourLogIndex := r.getRaftLogIndex() // If no discovery service, assume it's safe (backward compatibility) if r.discoveryService == nil { r.logger.Debug("No discovery service, assuming safe to lead") return true, nil } // Query discovery service for other nodes otherNodes := r.discoveryService.GetActivePeers() if len(otherNodes) == 0 { // No other nodes - safe to bootstrap r.logger.Debug("No other nodes discovered, safe to lead", zap.Uint64("our_log_index", ourLogIndex)) return true, nil } // Check if any other node has higher log index for _, peer := range otherNodes { if peer.RaftLogIndex > ourLogIndex { // Other node has more data - we should join them return false, fmt.Errorf( "node %s has higher log index (%d > %d), should join as follower", peer.NodeID, peer.RaftLogIndex, ourLogIndex) } } // We have most recent data or equal - safe to lead r.logger.Info("Safe to lead - we have most recent data", zap.Uint64("our_log_index", ourLogIndex), zap.Int("other_nodes_checked", len(otherNodes))) return true, nil } // joinExistingCluster attempts to join an existing cluster as a follower func (r *RQLiteManager) joinExistingCluster(ctx context.Context, raftAddress string) error { r.logger.Info("Attempting to join existing cluster", zap.String("target_raft_address", raftAddress)) // Wait for the target to be reachable if err := r.waitForJoinTarget(ctx, raftAddress, 2*time.Minute); err != nil { return fmt.Errorf("join target not reachable: %w", err) } // Wait for SQL availability (the target should have a leader) sqlCtx := ctx if _, hasDeadline := ctx.Deadline(); !hasDeadline { var cancel context.CancelFunc sqlCtx, cancel = context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() } if err := r.waitForSQLAvailable(sqlCtx); err != nil { return fmt.Errorf("failed to join cluster - SQL not available: %w", err) } r.logger.Info("Successfully joined existing cluster") return nil } // exponentialBackoff calculates exponential backoff duration with jitter func (r *RQLiteManager) exponentialBackoff(attempt int, baseDelay time.Duration, maxDelay time.Duration) time.Duration { // Calculate exponential backoff: baseDelay * 2^attempt delay := baseDelay * time.Duration(1< maxDelay { delay = maxDelay } // Add jitter (±20%) jitter := time.Duration(float64(delay) * 0.2 * (2.0*float64(time.Now().UnixNano()%100)/100.0 - 1.0)) return delay + jitter } // recoverCluster restarts RQLite using the recovery.db created from peers.json // It reuses launchProcess and waitForReadyAndConnect to ensure all join/backoff logic // and proper readiness checks are applied during recovery. func (r *RQLiteManager) recoverCluster(ctx context.Context, peersJSONPath string) error { r.logger.Info("Initiating cluster recovery by restarting RQLite", zap.String("peers_file", peersJSONPath)) // Stop the current RQLite process r.logger.Info("Stopping RQLite for recovery") if err := r.Stop(); err != nil { r.logger.Warn("Error stopping RQLite", zap.Error(err)) } // Wait for process to fully stop time.Sleep(2 * time.Second) // Get the data directory path rqliteDataDir, err := r.rqliteDataDirPath() if err != nil { return fmt.Errorf("failed to resolve RQLite data directory: %w", err) } // Restart RQLite using launchProcess to ensure all join/backoff logic is applied // This includes: join address handling, join retries, bootstrap-expect, etc. r.logger.Info("Restarting RQLite (will auto-recover using peers.json)") if err := r.launchProcess(ctx, rqliteDataDir); err != nil { return fmt.Errorf("failed to restart RQLite process: %w", err) } // Wait for RQLite to be ready and establish connection using proper readiness checks // This includes retries for "store is not open" errors during recovery if err := r.waitForReadyAndConnect(ctx); err != nil { // Clean up the process if connection failed if r.cmd != nil && r.cmd.Process != nil { _ = r.cmd.Process.Kill() } return fmt.Errorf("failed to wait for RQLite readiness after recovery: %w", err) } r.logger.Info("Cluster recovery completed, RQLite restarted with new configuration") return nil } // checkNeedsClusterRecovery checks if the node has old cluster state that requires coordinated recovery // Returns true if there are snapshots but the raft log is empty (typical after a crash/restart) func (r *RQLiteManager) checkNeedsClusterRecovery(rqliteDataDir string) (bool, error) { // Check for snapshots directory snapshotsDir := filepath.Join(rqliteDataDir, "rsnapshots") if _, err := os.Stat(snapshotsDir); os.IsNotExist(err) { // No snapshots = fresh start, no recovery needed return false, nil } // Check if snapshots directory has any snapshots entries, err := os.ReadDir(snapshotsDir) if err != nil { return false, fmt.Errorf("failed to read snapshots directory: %w", err) } hasSnapshots := false for _, entry := range entries { if entry.IsDir() || strings.HasSuffix(entry.Name(), ".db") { hasSnapshots = true break } } if !hasSnapshots { // No snapshots = fresh start return false, nil } // Check raft log size - if it's the default empty size, we need recovery raftLogPath := filepath.Join(rqliteDataDir, "raft.db") if info, err := os.Stat(raftLogPath); err == nil { // Empty or default-sized log with snapshots means we need coordinated recovery if info.Size() <= 8*1024*1024 { // <= 8MB (default empty log size) r.logger.Info("Detected cluster recovery situation: snapshots exist but raft log is empty/default size", zap.String("snapshots_dir", snapshotsDir), zap.Int64("raft_log_size", info.Size())) return true, nil } } return false, nil } // hasExistingRaftState checks if this node has any existing Raft state files // Returns true if raft.db exists and has content, or if peers.json exists func (r *RQLiteManager) hasExistingRaftState(rqliteDataDir string) bool { // Check for raft.db raftLogPath := filepath.Join(rqliteDataDir, "raft.db") if info, err := os.Stat(raftLogPath); err == nil { // If raft.db exists and has meaningful content (> 1KB), we have state if info.Size() > 1024 { return true } } // Check for peers.json peersPath := filepath.Join(rqliteDataDir, "raft", "peers.json") if _, err := os.Stat(peersPath); err == nil { return true } return false } // clearRaftState safely removes Raft state files to allow a clean join // This removes raft.db and peers.json but preserves db.sqlite func (r *RQLiteManager) clearRaftState(rqliteDataDir string) error { r.logger.Warn("Clearing Raft state to allow clean cluster join", zap.String("data_dir", rqliteDataDir)) // Remove raft.db if it exists raftLogPath := filepath.Join(rqliteDataDir, "raft.db") if err := os.Remove(raftLogPath); err != nil && !os.IsNotExist(err) { r.logger.Warn("Failed to remove raft.db", zap.Error(err)) } else if err == nil { r.logger.Info("Removed raft.db") } // Remove peers.json if it exists peersPath := filepath.Join(rqliteDataDir, "raft", "peers.json") if err := os.Remove(peersPath); err != nil && !os.IsNotExist(err) { r.logger.Warn("Failed to remove peers.json", zap.Error(err)) } else if err == nil { r.logger.Info("Removed peers.json") } // Remove raft directory if it's empty raftDir := filepath.Join(rqliteDataDir, "raft") if entries, err := os.ReadDir(raftDir); err == nil && len(entries) == 0 { if err := os.Remove(raftDir); err != nil { r.logger.Debug("Failed to remove empty raft directory", zap.Error(err)) } } r.logger.Info("Raft state cleared successfully - node will join as fresh follower") return nil } // isStuckInConfigurationMismatch checks if we're stuck due to configuration mismatch // This detects both configuration mismatch AND split-brain scenarios func (r *RQLiteManager) isStuckInConfigurationMismatch() bool { // First check for split-brain (all followers, term 0, no peers) if r.isInSplitBrainState() { return true } // Then check for traditional configuration mismatch status, err := r.getRQLiteStatus() if err != nil { r.logger.Debug("Cannot check Raft status for configuration mismatch", zap.Error(err)) return false // Can't determine, don't clear } raftState := strings.ToLower(status.Store.Raft.State) hasLeader := status.Store.Raft.LeaderAddr != "" // Stuck if: no leader AND state is not "leader" or "follower" if !hasLeader && raftState != "leader" && raftState != "follower" { if r.allPeersAreStuck() { return true } } return false } // allPeersAreStuck checks if all discovered peers also report no leader // This helps confirm we're in a cluster-wide configuration mismatch, not just a local issue func (r *RQLiteManager) allPeersAreStuck() bool { if r.discoveryService == nil { r.logger.Debug("No discovery service available to check peer status") return false } peers := r.discoveryService.GetActivePeers() if len(peers) == 0 { r.logger.Debug("No peers discovered, might be network issue") return false // No peers discovered, might be network issue } // Check if we can query peers and they all report no leader stuckCount := 0 reachableCount := 0 for _, peer := range peers { if r.peerHasLeader(peer.HTTPAddress) { // Peer has a leader, so we're not in cluster-wide mismatch return false } // Check if peer is at least reachable if r.isPeerReachable(peer.HTTPAddress) { reachableCount++ stuckCount++ } } // If we have reachable peers and they're all stuck, we're likely in cluster-wide config mismatch if reachableCount > 0 && stuckCount == reachableCount { r.logger.Debug("All reachable peers are also stuck", zap.Int("reachable_peers", reachableCount), zap.Int("total_peers", len(peers))) return true } return false } // peerHasLeader checks if a peer has a leader by querying its status endpoint func (r *RQLiteManager) peerHasLeader(httpAddr string) bool { url := fmt.Sprintf("http://%s/status", httpAddr) client := &http.Client{Timeout: 3 * time.Second} resp, err := client.Get(url) if err != nil { return false // Can't reach peer } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return false } var status RQLiteStatus if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { return false } // Peer has leader if leader address is set return status.Store.Raft.LeaderAddr != "" } // isPeerReachable checks if a peer is at least responding to HTTP requests func (r *RQLiteManager) isPeerReachable(httpAddr string) bool { url := fmt.Sprintf("http://%s/status", httpAddr) client := &http.Client{Timeout: 3 * time.Second} resp, err := client.Get(url) if err != nil { return false } defer resp.Body.Close() return resp.StatusCode == http.StatusOK } // isInSplitBrainState detects if we're in a split-brain scenario where all nodes // are followers with no peers (each node thinks it's alone) func (r *RQLiteManager) isInSplitBrainState() bool { status, err := r.getRQLiteStatus() if err != nil { return false } raft := status.Store.Raft // Split-brain indicators: // - State is Follower (not Leader) // - Term is 0 (no leader election has occurred) // - num_peers is 0 (node thinks it's alone) // - voter is false (node not configured as voter) isSplitBrain := raft.State == "Follower" && raft.Term == 0 && raft.NumPeers == 0 && !raft.Voter && raft.LeaderAddr == "" if !isSplitBrain { return false } // Verify all discovered peers are also in split-brain state if r.discoveryService == nil { r.logger.Debug("No discovery service to verify split-brain across peers") return false } peers := r.discoveryService.GetActivePeers() if len(peers) == 0 { // No peers discovered yet - might be network issue, not split-brain return false } // Check if all reachable peers are also in split-brain splitBrainCount := 0 reachableCount := 0 for _, peer := range peers { if !r.isPeerReachable(peer.HTTPAddress) { continue } reachableCount++ peerStatus, err := r.getPeerRQLiteStatus(peer.HTTPAddress) if err != nil { continue } peerRaft := peerStatus.Store.Raft if peerRaft.State == "Follower" && peerRaft.Term == 0 && peerRaft.NumPeers == 0 && !peerRaft.Voter { splitBrainCount++ } } // If all reachable peers are in split-brain, we have cluster-wide split-brain if reachableCount > 0 && splitBrainCount == reachableCount { r.logger.Warn("Detected cluster-wide split-brain state", zap.Int("reachable_peers", reachableCount), zap.Int("split_brain_peers", splitBrainCount)) return true } return false } // getPeerRQLiteStatus queries a peer's status endpoint func (r *RQLiteManager) getPeerRQLiteStatus(httpAddr string) (*RQLiteStatus, error) { url := fmt.Sprintf("http://%s/status", httpAddr) client := &http.Client{Timeout: 3 * time.Second} resp, err := client.Get(url) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("peer returned status %d", resp.StatusCode) } var status RQLiteStatus if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { return nil, err } return &status, nil } // startHealthMonitoring runs periodic health checks and automatically recovers from split-brain func (r *RQLiteManager) startHealthMonitoring(ctx context.Context) { // Wait a bit after startup before starting health checks time.Sleep(30 * time.Second) ticker := time.NewTicker(60 * time.Second) // Check every minute defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: // Check for split-brain state if r.isInSplitBrainState() { r.logger.Warn("Split-brain detected during health check, initiating automatic recovery") // Attempt automatic recovery if err := r.recoverFromSplitBrain(ctx); err != nil { r.logger.Error("Automatic split-brain recovery failed", zap.Error(err), zap.String("action", "will retry on next health check")) } else { r.logger.Info("Successfully recovered from split-brain") } } } } } // recoverFromSplitBrain automatically recovers from split-brain state func (r *RQLiteManager) recoverFromSplitBrain(ctx context.Context) error { if r.discoveryService == nil { return fmt.Errorf("discovery service not available for recovery") } r.logger.Info("Starting automatic split-brain recovery") // Step 1: Ensure we have latest peer information r.discoveryService.TriggerPeerExchange(ctx) time.Sleep(2 * time.Second) r.discoveryService.TriggerSync() time.Sleep(2 * time.Second) // Step 2: Get data directory rqliteDataDir, err := r.rqliteDataDirPath() if err != nil { return fmt.Errorf("failed to get data directory: %w", err) } // Step 3: Check if peers have more recent data allPeers := r.discoveryService.GetAllPeers() maxPeerIndex := uint64(0) for _, peer := range allPeers { if peer.NodeID == r.discoverConfig.RaftAdvAddress { continue // Skip self } if peer.RaftLogIndex > maxPeerIndex { maxPeerIndex = peer.RaftLogIndex } } // Step 4: Clear our Raft state if peers have more recent data ourIndex := r.getRaftLogIndex() if maxPeerIndex > ourIndex || (maxPeerIndex == 0 && ourIndex == 0) { r.logger.Info("Clearing Raft state to allow clean cluster join", zap.Uint64("our_index", ourIndex), zap.Uint64("peer_max_index", maxPeerIndex)) if err := r.clearRaftState(rqliteDataDir); err != nil { return fmt.Errorf("failed to clear Raft state: %w", err) } // Step 5: Refresh peer metadata and force write peers.json // We trigger peer exchange again to ensure we have the absolute latest metadata // after clearing state, then force write peers.json regardless of changes r.logger.Info("Refreshing peer metadata after clearing raft state") r.discoveryService.TriggerPeerExchange(ctx) time.Sleep(1 * time.Second) // Brief wait for peer exchange to complete r.logger.Info("Force writing peers.json with all discovered peers") // We use ForceWritePeersJSON instead of TriggerSync because TriggerSync // only writes if membership changed, but after clearing state we need // to write regardless of changes if err := r.discoveryService.ForceWritePeersJSON(); err != nil { return fmt.Errorf("failed to force write peers.json: %w", err) } // Verify peers.json was created peersPath := filepath.Join(rqliteDataDir, "raft", "peers.json") if _, err := os.Stat(peersPath); err != nil { return fmt.Errorf("peers.json not created after force write: %w", err) } r.logger.Info("peers.json verified after force write", zap.String("peers_path", peersPath)) // Step 6: Restart RQLite to pick up new peers.json r.logger.Info("Restarting RQLite to apply new cluster configuration") if err := r.recoverCluster(ctx, peersPath); err != nil { return fmt.Errorf("failed to restart RQLite: %w", err) } // Step 7: Wait for cluster to form (waitForReadyAndConnect already handled readiness) r.logger.Info("Waiting for cluster to stabilize after recovery...") time.Sleep(5 * time.Second) // Verify recovery succeeded if r.isInSplitBrainState() { return fmt.Errorf("still in split-brain after recovery attempt") } r.logger.Info("Split-brain recovery completed successfully") return nil } return fmt.Errorf("cannot recover: we have more recent data than peers") } // isSafeToClearState verifies we can safely clear Raft state // Returns true only if peers have higher log indexes (they have more recent data) // or if we have no meaningful state (index == 0) func (r *RQLiteManager) isSafeToClearState(rqliteDataDir string) bool { if r.discoveryService == nil { r.logger.Debug("No discovery service available, cannot verify safety") return false // No discovery service, can't verify } ourIndex := r.getRaftLogIndex() peers := r.discoveryService.GetActivePeers() if len(peers) == 0 { r.logger.Debug("No peers discovered, might be network issue") return false // No peers, might be network issue } // Find max peer log index maxPeerIndex := uint64(0) for _, peer := range peers { if peer.RaftLogIndex > maxPeerIndex { maxPeerIndex = peer.RaftLogIndex } } // Safe to clear if peers have higher log indexes (they have more recent data) // OR if we have no meaningful state (index == 0) safe := maxPeerIndex > ourIndex || ourIndex == 0 r.logger.Debug("Checking if safe to clear Raft state", zap.Uint64("our_log_index", ourIndex), zap.Uint64("peer_max_log_index", maxPeerIndex), zap.Bool("safe_to_clear", safe)) return safe } // performPreStartClusterDiscovery waits for peer discovery and builds a complete peers.json // before starting RQLite. This ensures all nodes use the same cluster membership for recovery. func (r *RQLiteManager) performPreStartClusterDiscovery(ctx context.Context, rqliteDataDir string) error { if r.discoveryService == nil { r.logger.Warn("No discovery service available, cannot perform pre-start cluster discovery") return fmt.Errorf("discovery service not available") } r.logger.Info("Waiting for peer discovery to find other cluster members...") // CRITICAL: First, actively trigger peer exchange to populate peerstore with RQLite metadata // The peerstore needs RQLite metadata from other nodes BEFORE we can collect it r.logger.Info("Triggering peer exchange to collect RQLite metadata from connected peers") if err := r.discoveryService.TriggerPeerExchange(ctx); err != nil { r.logger.Warn("Peer exchange failed, continuing anyway", zap.Error(err)) } // Give peer exchange a moment to complete time.Sleep(1 * time.Second) // Now trigger cluster membership sync to populate knownPeers map from the peerstore r.logger.Info("Triggering initial cluster membership sync to populate peer list") r.discoveryService.TriggerSync() // Give the sync a moment to complete time.Sleep(2 * time.Second) // Wait for peer discovery - give it time to find peers (30 seconds should be enough) discoveryDeadline := time.Now().Add(30 * time.Second) var discoveredPeers int for time.Now().Before(discoveryDeadline) { // Check how many peers with RQLite metadata we've discovered allPeers := r.discoveryService.GetAllPeers() discoveredPeers = len(allPeers) r.logger.Info("Peer discovery progress", zap.Int("discovered_peers", discoveredPeers), zap.Duration("time_remaining", time.Until(discoveryDeadline))) // If we have at least our minimum cluster size, proceed if discoveredPeers >= r.config.MinClusterSize { r.logger.Info("Found minimum cluster size peers, proceeding with recovery", zap.Int("discovered_peers", discoveredPeers), zap.Int("min_cluster_size", r.config.MinClusterSize)) break } // Wait a bit before checking again time.Sleep(2 * time.Second) } // CRITICAL FIX: Skip recovery if no peers were discovered (other than ourselves) // Only ourselves in the cluster means this is a fresh bootstrap, not a recovery scenario if discoveredPeers <= 1 { r.logger.Info("No peers discovered during pre-start discovery window - skipping recovery (fresh bootstrap)", zap.Int("discovered_peers", discoveredPeers)) return nil } // AUTOMATIC RECOVERY: Check if we have stale Raft state that conflicts with cluster // If we have existing state but peers have higher log indexes, clear our state to allow clean join allPeers := r.discoveryService.GetAllPeers() hasExistingState := r.hasExistingRaftState(rqliteDataDir) if hasExistingState { // Find the highest log index among other peers (excluding ourselves) maxPeerIndex := uint64(0) for _, peer := range allPeers { // Skip ourselves (compare by raft address) if peer.NodeID == r.discoverConfig.RaftAdvAddress { continue } if peer.RaftLogIndex > maxPeerIndex { maxPeerIndex = peer.RaftLogIndex } } // If peers have meaningful log history (> 0) and we have stale state, clear it // This handles the case where we're starting with old state but the cluster has moved on if maxPeerIndex > 0 { r.logger.Warn("Detected stale Raft state - clearing to allow clean cluster join", zap.Uint64("peer_max_log_index", maxPeerIndex), zap.String("data_dir", rqliteDataDir)) if err := r.clearRaftState(rqliteDataDir); err != nil { r.logger.Error("Failed to clear Raft state", zap.Error(err)) // Continue anyway - rqlite might still be able to recover } else { // Force write peers.json after clearing stale state if r.discoveryService != nil { r.logger.Info("Force writing peers.json after clearing stale Raft state") if err := r.discoveryService.ForceWritePeersJSON(); err != nil { r.logger.Error("Failed to force write peers.json after clearing stale state", zap.Error(err)) } } } } } // Trigger final sync to ensure peers.json is up to date with latest discovered peers r.logger.Info("Triggering final cluster membership sync to build complete peers.json") r.discoveryService.TriggerSync() // Wait a moment for the sync to complete time.Sleep(2 * time.Second) // Verify peers.json was created peersPath := filepath.Join(rqliteDataDir, "raft", "peers.json") if _, err := os.Stat(peersPath); err != nil { return fmt.Errorf("peers.json was not created after discovery: %w", err) } r.logger.Info("Pre-start cluster discovery completed successfully", zap.String("peers_file", peersPath), zap.Int("peer_count", discoveredPeers)) return nil } // validateNodeID checks that rqlite's reported node ID matches our configured raft address func (r *RQLiteManager) validateNodeID() error { // Query /nodes endpoint to get our node ID // Retry a few times as the endpoint might not be ready immediately for i := 0; i < 5; i++ { nodes, err := r.getRQLiteNodes() if err != nil { // If endpoint is not ready yet, wait and retry if i < 4 { time.Sleep(500 * time.Millisecond) continue } // Log at debug level if validation fails - not critical r.logger.Debug("Node ID validation skipped (endpoint unavailable)", zap.Error(err)) return nil } expectedID := r.discoverConfig.RaftAdvAddress if expectedID == "" { return fmt.Errorf("raft_adv_address not configured") } // If cluster is still forming, nodes list might be empty - that's okay if len(nodes) == 0 { r.logger.Debug("Node ID validation skipped (cluster not yet formed)") return nil } // Find our node in the cluster (match by address) for _, node := range nodes { if node.Address == expectedID { if node.ID != expectedID { r.logger.Error("CRITICAL: RQLite node ID mismatch", zap.String("configured_raft_address", expectedID), zap.String("rqlite_node_id", node.ID), zap.String("rqlite_node_address", node.Address), zap.String("explanation", "peers.json id field must match rqlite's node ID (raft address)")) return fmt.Errorf("node ID mismatch: configured %s but rqlite reports %s", expectedID, node.ID) } r.logger.Debug("Node ID validation passed", zap.String("node_id", node.ID), zap.String("address", node.Address)) return nil } } // If we can't find ourselves but other nodes exist, cluster might still be forming // This is fine - don't log a warning r.logger.Debug("Node ID validation skipped (node not yet in cluster membership)", zap.String("expected_address", expectedID), zap.Int("nodes_in_cluster", len(nodes))) return nil } return nil }