Add comprehensive network connectivity diagnostics

- Add automated network diagnostics for RQLite join addresses
- Test port connectivity with netcat, HTTP responses, ping, and DNS
- Provide detailed troubleshooting information in logs
- Help identify exact causes of RQLite cluster join failures
- Test connectivity before attempting RQLite cluster join

This will help diagnose the 'invalid join address' error by showing exactly
why the connection to 57.129.81.31:4001 is failing.
This commit is contained in:
johnysigma 2025-08-06 13:08:27 +03:00
parent 56f0a01b79
commit 16a70a03aa
2 changed files with 83 additions and 5 deletions

View File

@ -147,6 +147,11 @@ func main() {
rqliteJoinAddr = "http://localhost:4001"
logger.Printf("Using localhost fallback for RQLite join")
}
// Log network connectivity diagnostics
logger.Printf("=== NETWORK DIAGNOSTICS ===")
logger.Printf("Target RQLite join address: %s", rqliteJoinAddr)
runNetworkDiagnostics(rqliteJoinAddr, logger)
}
// Regular nodes join the bootstrap node's RQLite cluster
@ -288,3 +293,75 @@ func startNode(ctx context.Context, cfg *config.Config, port int, isBootstrap bo
// Stop node
return n.Stop()
}
// runNetworkDiagnostics performs network connectivity tests
func runNetworkDiagnostics(rqliteJoinAddr string, logger *logging.StandardLogger) {
// Extract host and port from the join address
if !strings.HasPrefix(rqliteJoinAddr, "http://") {
logger.Printf("Invalid join address format: %s", rqliteJoinAddr)
return
}
// Parse URL to extract host:port
url := strings.TrimPrefix(rqliteJoinAddr, "http://")
parts := strings.Split(url, ":")
if len(parts) != 2 {
logger.Printf("Cannot parse host:port from %s", rqliteJoinAddr)
return
}
host := parts[0]
port := parts[1]
logger.Printf("Testing connectivity to %s:%s", host, port)
// Test 1: Basic connectivity with netcat or telnet
if output, err := exec.Command("timeout", "5", "nc", "-z", "-v", host, port).CombinedOutput(); err == nil {
logger.Printf("✅ Port %s:%s is reachable", host, port)
logger.Printf("netcat output: %s", strings.TrimSpace(string(output)))
} else {
logger.Printf("❌ Port %s:%s is NOT reachable", host, port)
logger.Printf("netcat error: %v", err)
logger.Printf("netcat output: %s", strings.TrimSpace(string(output)))
}
// Test 2: HTTP connectivity test
if output, err := exec.Command("timeout", "5", "curl", "-s", "-o", "/dev/null", "-w", "%{http_code}", rqliteJoinAddr+"/status").Output(); err == nil {
httpCode := strings.TrimSpace(string(output))
if httpCode == "200" {
logger.Printf("✅ HTTP service is responding correctly (status: %s)", httpCode)
} else {
logger.Printf("⚠️ HTTP service responded with status: %s", httpCode)
}
} else {
logger.Printf("❌ HTTP request failed: %v", err)
}
// Test 3: Ping test
if output, err := exec.Command("ping", "-c", "3", "-W", "2", host).Output(); err == nil {
lines := strings.Split(string(output), "\n")
for _, line := range lines {
if strings.Contains(line, "packet loss") {
logger.Printf("🏓 Ping result: %s", strings.TrimSpace(line))
break
}
}
} else {
logger.Printf("❌ Ping test failed: %v", err)
}
// Test 4: DNS resolution
if output, err := exec.Command("nslookup", host).Output(); err == nil {
logger.Printf("🔍 DNS resolution successful")
lines := strings.Split(string(output), "\n")
for _, line := range lines {
if strings.Contains(line, "Address:") && !strings.Contains(line, "#53") {
logger.Printf("DNS result: %s", strings.TrimSpace(line))
}
}
} else {
logger.Printf("❌ DNS resolution failed: %v", err)
}
logger.Printf("=== END DIAGNOSTICS ===")
}

View File

@ -69,15 +69,16 @@ func (r *RQLiteManager) Start(ctx context.Context) error {
// Validate join address format before using it
if strings.HasPrefix(r.config.RQLiteJoinAddress, "http://") {
// Test if the join address is reachable before attempting to join
// Test connectivity and log the results, but always attempt to join
if err := r.testJoinAddress(r.config.RQLiteJoinAddress); err != nil {
r.logger.Warn("Join address is not reachable, starting as new cluster instead",
r.logger.Warn("Join address connectivity test failed, but will still attempt to join",
zap.String("join_address", r.config.RQLiteJoinAddress),
zap.Error(err))
// Don't add the -join parameter, let this node start its own cluster
} else {
args = append(args, "-join", r.config.RQLiteJoinAddress)
r.logger.Info("Join address is reachable, proceeding with cluster join")
}
// Always add the join parameter - let RQLite handle retries
args = append(args, "-join", r.config.RQLiteJoinAddress)
} else {
r.logger.Warn("Invalid join address format, skipping join", zap.String("address", r.config.RQLiteJoinAddress))
return fmt.Errorf("invalid RQLite join address format: %s (must start with http://)", r.config.RQLiteJoinAddress)