From 16a70a03aaba00bc83857a8441c8333e1b9552a9 Mon Sep 17 00:00:00 2001 From: johnysigma Date: Wed, 6 Aug 2025 13:08:27 +0300 Subject: [PATCH] Add comprehensive network connectivity diagnostics - Add automated network diagnostics for RQLite join addresses - Test port connectivity with netcat, HTTP responses, ping, and DNS - Provide detailed troubleshooting information in logs - Help identify exact causes of RQLite cluster join failures - Test connectivity before attempting RQLite cluster join This will help diagnose the 'invalid join address' error by showing exactly why the connection to 57.129.81.31:4001 is failing. --- cmd/node/main.go | 79 +++++++++++++++++++++++++++++++++++++++++- pkg/database/rqlite.go | 9 ++--- 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/cmd/node/main.go b/cmd/node/main.go index 3837a3d..ca1ca38 100644 --- a/cmd/node/main.go +++ b/cmd/node/main.go @@ -140,13 +140,18 @@ func main() { // Try primary production server as fallback rqliteJoinAddr = "http://localhost:4001" } - logger.Printf("Using environment bootstrap peers: %v", bootstrapPeers) + logger.Printf("Using environment bootstrap peers: %v", bootstrapPeers) } else { logger.Printf("Warning: No bootstrap peers configured") // Default to localhost when no peers configured rqliteJoinAddr = "http://localhost:4001" logger.Printf("Using localhost fallback for RQLite join") } + + // Log network connectivity diagnostics + logger.Printf("=== NETWORK DIAGNOSTICS ===") + logger.Printf("Target RQLite join address: %s", rqliteJoinAddr) + runNetworkDiagnostics(rqliteJoinAddr, logger) } // Regular nodes join the bootstrap node's RQLite cluster @@ -288,3 +293,75 @@ func startNode(ctx context.Context, cfg *config.Config, port int, isBootstrap bo // Stop node return n.Stop() } + +// runNetworkDiagnostics performs network connectivity tests +func runNetworkDiagnostics(rqliteJoinAddr string, logger *logging.StandardLogger) { + // Extract host and port from the join address + if !strings.HasPrefix(rqliteJoinAddr, "http://") { + logger.Printf("Invalid join address format: %s", rqliteJoinAddr) + return + } + + // Parse URL to extract host:port + url := strings.TrimPrefix(rqliteJoinAddr, "http://") + parts := strings.Split(url, ":") + if len(parts) != 2 { + logger.Printf("Cannot parse host:port from %s", rqliteJoinAddr) + return + } + + host := parts[0] + port := parts[1] + + logger.Printf("Testing connectivity to %s:%s", host, port) + + // Test 1: Basic connectivity with netcat or telnet + if output, err := exec.Command("timeout", "5", "nc", "-z", "-v", host, port).CombinedOutput(); err == nil { + logger.Printf("✅ Port %s:%s is reachable", host, port) + logger.Printf("netcat output: %s", strings.TrimSpace(string(output))) + } else { + logger.Printf("❌ Port %s:%s is NOT reachable", host, port) + logger.Printf("netcat error: %v", err) + logger.Printf("netcat output: %s", strings.TrimSpace(string(output))) + } + + // Test 2: HTTP connectivity test + if output, err := exec.Command("timeout", "5", "curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", rqliteJoinAddr+"/status").Output(); err == nil { + httpCode := strings.TrimSpace(string(output)) + if httpCode == "200" { + logger.Printf("✅ HTTP service is responding correctly (status: %s)", httpCode) + } else { + logger.Printf("⚠️ HTTP service responded with status: %s", httpCode) + } + } else { + logger.Printf("❌ HTTP request failed: %v", err) + } + + // Test 3: Ping test + if output, err := exec.Command("ping", "-c", "3", "-W", "2", host).Output(); err == nil { + lines := strings.Split(string(output), "\n") + for _, line := range lines { + if strings.Contains(line, "packet loss") { + logger.Printf("🏓 Ping result: %s", strings.TrimSpace(line)) + break + } + } + } else { + logger.Printf("❌ Ping test failed: %v", err) + } + + // Test 4: DNS resolution + if output, err := exec.Command("nslookup", host).Output(); err == nil { + logger.Printf("🔍 DNS resolution successful") + lines := strings.Split(string(output), "\n") + for _, line := range lines { + if strings.Contains(line, "Address:") && !strings.Contains(line, "#53") { + logger.Printf("DNS result: %s", strings.TrimSpace(line)) + } + } + } else { + logger.Printf("❌ DNS resolution failed: %v", err) + } + + logger.Printf("=== END DIAGNOSTICS ===") +} diff --git a/pkg/database/rqlite.go b/pkg/database/rqlite.go index 4bc45b9..f806eb5 100644 --- a/pkg/database/rqlite.go +++ b/pkg/database/rqlite.go @@ -69,15 +69,16 @@ func (r *RQLiteManager) Start(ctx context.Context) error { // Validate join address format before using it if strings.HasPrefix(r.config.RQLiteJoinAddress, "http://") { - // Test if the join address is reachable before attempting to join + // Test connectivity and log the results, but always attempt to join if err := r.testJoinAddress(r.config.RQLiteJoinAddress); err != nil { - r.logger.Warn("Join address is not reachable, starting as new cluster instead", + r.logger.Warn("Join address connectivity test failed, but will still attempt to join", zap.String("join_address", r.config.RQLiteJoinAddress), zap.Error(err)) - // Don't add the -join parameter, let this node start its own cluster } else { - args = append(args, "-join", r.config.RQLiteJoinAddress) + r.logger.Info("Join address is reachable, proceeding with cluster join") } + // Always add the join parameter - let RQLite handle retries + args = append(args, "-join", r.config.RQLiteJoinAddress) } else { r.logger.Warn("Invalid join address format, skipping join", zap.String("address", r.config.RQLiteJoinAddress)) return fmt.Errorf("invalid RQLite join address format: %s (must start with http://)", r.config.RQLiteJoinAddress)