diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a75911..0daf671 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,27 @@ The format is based on [Keep a Changelog][keepachangelog] and adheres to [Semant ### Deprecated ### Fixed +## [0.69.6] - 2025-11-12 + +### Added +- Improved production service health checks and port availability validation during install, upgrade, start, and restart commands. +- Added service aliases (node, ipfs, cluster, gateway, olric) to `dbn prod logs` command for easier log viewing. + +### Changed +- Updated node configuration logic to correctly advertise public IP addresses in multiaddrs (for P2P discovery) and RQLite addresses, improving connectivity for nodes behind NAT/firewalls. +- Enhanced `dbn prod install` and `dbn prod upgrade` to automatically detect and preserve existing VPS IP, domain, and cluster join information. +- Improved RQLite cluster discovery to automatically replace localhost/loopback addresses with the actual public IP when exchanging metadata between peers. +- Updated `dbn prod install` to require `--vps-ip` for all node types (bootstrap and regular) for proper network configuration. +- Improved error handling and robustness in the installation script when fetching the latest release from GitHub. + +### Deprecated + +### Removed + +### Fixed +- Fixed an issue where the RQLite process would wait indefinitely for a join target; now uses a 5-minute timeout. +- Corrected the location of the gateway configuration file reference in the README. + ## [0.69.5] - 2025-11-11 ### Added diff --git a/Makefile b/Makefile index 93d3c9a..f642e05 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ test-e2e: .PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill -VERSION := 0.69.5 +VERSION := 0.69.6 COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' diff --git a/README.md b/README.md index 4a6d795..ddc4c79 100644 --- a/README.md +++ b/README.md @@ -217,7 +217,7 @@ Currently, the `upgrade` command doesn't support `--domain` flag. To enable HTTP 1. **Edit the gateway configuration:** ```bash -sudo nano /home/debros/.debros/configs/gateway.yaml +sudo nano /home/debros/.debros/data/gateway.yaml ``` 2. **Update the configuration:** @@ -358,6 +358,9 @@ sudo systemctl enable debros-* # Install bootstrap node sudo dbn prod install --bootstrap [--domain DOMAIN] [--branch BRANCH] + +sudo dbn prod install --nightly --domain node-gh38V1.debros.network --vps-ip 57.128.223.92 --ignore-resource-checks --bootstrap-join + # Install secondary node sudo dbn prod install --vps-ip IP --peers ADDRS [--domain DOMAIN] [--branch BRANCH] diff --git a/cmd/node/main.go b/cmd/node/main.go index 2e1bd5a..136af61 100644 --- a/cmd/node/main.go +++ b/cmd/node/main.go @@ -4,6 +4,7 @@ import ( "context" "flag" "fmt" + "net" "os" "os/signal" "path/filepath" @@ -137,7 +138,26 @@ func startNode(ctx context.Context, cfg *config.Config, port int) error { // Save the peer ID to a file for CLI access (especially useful for bootstrap) peerID := n.GetPeerID() peerInfoFile := filepath.Join(dataDir, "peer.info") - peerMultiaddr := fmt.Sprintf("/ip4/0.0.0.0/tcp/%d/p2p/%s", port, peerID) + + // Extract advertise IP from config (prefer http_adv_address, fallback to raft_adv_address) + advertiseIP := "0.0.0.0" // Default fallback + if cfg.Discovery.HttpAdvAddress != "" { + if host, _, err := net.SplitHostPort(cfg.Discovery.HttpAdvAddress); err == nil && host != "" && host != "localhost" { + advertiseIP = host + } + } else if cfg.Discovery.RaftAdvAddress != "" { + if host, _, err := net.SplitHostPort(cfg.Discovery.RaftAdvAddress); err == nil && host != "" && host != "localhost" { + advertiseIP = host + } + } + + // Determine IP protocol (IPv4 or IPv6) for multiaddr + ipProtocol := "ip4" + if ip := net.ParseIP(advertiseIP); ip != nil && ip.To4() == nil { + ipProtocol = "ip6" + } + + peerMultiaddr := fmt.Sprintf("/%s/%s/tcp/%d/p2p/%s", ipProtocol, advertiseIP, port, peerID) if err := os.WriteFile(peerInfoFile, []byte(peerMultiaddr), 0644); err != nil { logger.Error("Failed to save peer info: %v", zap.Error(err)) diff --git a/pkg/cli/basic_commands.go b/pkg/cli/basic_commands.go index d3db10d..ade1ecf 100644 --- a/pkg/cli/basic_commands.go +++ b/pkg/cli/basic_commands.go @@ -245,12 +245,24 @@ func HandlePubSubCommand(args []string, format string, timeout time.Duration) { func createClient() (client.NetworkClient, error) { config := client.DefaultClientConfig("dbn") + // Use active environment's gateway URL + gatewayURL := getGatewayURL() + config.GatewayURL = gatewayURL + + // Try to get bootstrap peers from active environment + // For now, we'll use the default bootstrap peers from config + // In the future, environments could specify their own bootstrap peers + env, err := GetActiveEnvironment() + if err == nil && env != nil { + // Environment loaded successfully - gateway URL already set above + // Bootstrap peers could be added to Environment struct in the future + _ = env // Use env if we add bootstrap peers to it + } + // Check for existing credentials using enhanced authentication creds, err := auth.GetValidEnhancedCredentials() if err != nil { // No valid credentials found, use the enhanced authentication flow - gatewayURL := getGatewayURL() - newCreds, authErr := auth.GetOrPromptForCredentials(gatewayURL) if authErr != nil { return nil, fmt.Errorf("authentication failed: %w", authErr) diff --git a/pkg/cli/prod_commands.go b/pkg/cli/prod_commands.go index 5b2f113..921ca98 100644 --- a/pkg/cli/prod_commands.go +++ b/pkg/cli/prod_commands.go @@ -2,16 +2,54 @@ package cli import ( "bufio" + "errors" + "flag" "fmt" + "net" + "net/http" "os" "os/exec" "path/filepath" "strings" + "syscall" "time" "github.com/DeBrosOfficial/network/pkg/environments/production" + "github.com/multiformats/go-multiaddr" ) +// normalizeBootstrapPeers normalizes and validates bootstrap peer multiaddrs +func normalizeBootstrapPeers(peersStr string) ([]string, error) { + if peersStr == "" { + return nil, nil + } + + // Split by comma and trim whitespace + rawPeers := strings.Split(peersStr, ",") + peers := make([]string, 0, len(rawPeers)) + seen := make(map[string]bool) + + for _, peer := range rawPeers { + peer = strings.TrimSpace(peer) + if peer == "" { + continue + } + + // Validate multiaddr format + if _, err := multiaddr.NewMultiaddr(peer); err != nil { + return nil, fmt.Errorf("invalid multiaddr %q: %w", peer, err) + } + + // Deduplicate + if !seen[peer] { + peers = append(peers, peer) + seen[peer] = true + } + } + + return peers, nil +} + // HandleProdCommand handles production environment commands func HandleProdCommand(args []string) { if len(args) == 0 { @@ -57,7 +95,7 @@ func showProdHelp() { fmt.Printf(" --force - Reconfigure all settings\n") fmt.Printf(" --bootstrap - Install as bootstrap node\n") fmt.Printf(" --vps-ip IP - VPS public IP address (required for non-bootstrap)\n") - fmt.Printf(" --peers ADDRS - Comma-separated bootstrap peers (for non-bootstrap)\n") + fmt.Printf(" --peers ADDRS - Comma-separated bootstrap peer multiaddrs (required for non-bootstrap)\n") fmt.Printf(" --bootstrap-join ADDR - Bootstrap raft join address (for secondary bootstrap)\n") fmt.Printf(" --domain DOMAIN - Domain for HTTPS (optional)\n") fmt.Printf(" --branch BRANCH - Git branch to use (main or nightly, default: main)\n") @@ -72,6 +110,7 @@ func showProdHelp() { fmt.Printf(" stop - Stop all production services (requires root/sudo)\n") fmt.Printf(" restart - Restart all production services (requires root/sudo)\n") fmt.Printf(" logs - View production service logs\n") + fmt.Printf(" Service aliases: node, ipfs, cluster, gateway, olric\n") fmt.Printf(" Options:\n") fmt.Printf(" --follow - Follow logs in real-time\n") fmt.Printf(" uninstall - Remove production services (requires root/sudo)\n\n") @@ -83,7 +122,7 @@ func showProdHelp() { fmt.Printf(" # Join existing cluster\n") fmt.Printf(" sudo dbn prod install --vps-ip 10.0.0.2 --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...\n\n") fmt.Printf(" # Secondary bootstrap joining existing cluster\n") - fmt.Printf(" sudo dbn prod install --bootstrap --vps-ip 10.0.0.2 --bootstrap-join 10.0.0.1:7001\n\n") + fmt.Printf(" sudo dbn prod install --bootstrap --vps-ip 10.0.0.2 --bootstrap-join 10.0.0.1:7001 --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...\n\n") fmt.Printf(" # Upgrade using saved branch preference\n") fmt.Printf(" sudo dbn prod upgrade --restart\n\n") fmt.Printf(" # Upgrade and switch to nightly branch\n") @@ -96,61 +135,43 @@ func showProdHelp() { fmt.Printf(" sudo dbn prod restart\n\n") fmt.Printf(" dbn prod status\n") fmt.Printf(" dbn prod logs node --follow\n") + fmt.Printf(" dbn prod logs gateway --follow\n") } func handleProdInstall(args []string) { - // Parse arguments - force := false - isBootstrap := false - skipResourceChecks := false - var vpsIP, domain, peersStr, bootstrapJoin, branch string + // Parse arguments using flag.FlagSet + fs := flag.NewFlagSet("install", flag.ContinueOnError) + fs.SetOutput(os.Stderr) - for i, arg := range args { - switch arg { - case "--force": - force = true - case "--bootstrap": - isBootstrap = true - case "--peers": - if i+1 < len(args) { - peersStr = args[i+1] - } - case "--vps-ip": - if i+1 < len(args) { - vpsIP = args[i+1] - } - case "--domain": - if i+1 < len(args) { - domain = args[i+1] - } - case "--bootstrap-join": - if i+1 < len(args) { - bootstrapJoin = args[i+1] - } - case "--branch": - if i+1 < len(args) { - branch = args[i+1] - } - case "--ignore-resource-checks": - skipResourceChecks = true + force := fs.Bool("force", false, "Reconfigure all settings") + isBootstrap := fs.Bool("bootstrap", false, "Install as bootstrap node") + skipResourceChecks := fs.Bool("ignore-resource-checks", false, "Skip disk/RAM/CPU prerequisite validation") + vpsIP := fs.String("vps-ip", "", "VPS public IP address (required for non-bootstrap)") + domain := fs.String("domain", "", "Domain for HTTPS (optional)") + peersStr := fs.String("peers", "", "Comma-separated bootstrap peer multiaddrs (required for non-bootstrap)") + bootstrapJoin := fs.String("bootstrap-join", "", "Bootstrap raft join address (for secondary bootstrap)") + branch := fs.String("branch", "main", "Git branch to use (main or nightly)") + + if err := fs.Parse(args); err != nil { + if err == flag.ErrHelp { + return } - } - - // Validate branch if provided - if branch != "" && branch != "main" && branch != "nightly" { - fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", branch) + fmt.Fprintf(os.Stderr, "❌ Failed to parse flags: %v\n", err) os.Exit(1) } - // Default to main if not specified - if branch == "" { - branch = "main" + // Validate branch + if *branch != "main" && *branch != "nightly" { + fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", *branch) + os.Exit(1) } - // Parse bootstrap peers if provided - var bootstrapPeers []string - if peersStr != "" { - bootstrapPeers = strings.Split(peersStr, ",") + // Normalize and validate bootstrap peers + bootstrapPeers, err := normalizeBootstrapPeers(*peersStr) + if err != nil { + fmt.Fprintf(os.Stderr, "❌ Invalid bootstrap peers: %v\n", err) + fmt.Fprintf(os.Stderr, " Example: --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...,/ip4/10.0.0.2/tcp/4001/p2p/Qm...\n") + os.Exit(1) } // Validate setup requirements @@ -159,19 +180,50 @@ func handleProdInstall(args []string) { os.Exit(1) } - // Enforce --vps-ip for non-bootstrap nodes - if !isBootstrap && vpsIP == "" { - fmt.Fprintf(os.Stderr, "❌ --vps-ip is required for non-bootstrap nodes\n") - fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --vps-ip --peers \n") - os.Exit(1) + // Validate bootstrap node requirements + if *isBootstrap { + if *vpsIP == "" { + fmt.Fprintf(os.Stderr, "❌ --vps-ip is required for bootstrap nodes\n") + fmt.Fprintf(os.Stderr, " Bootstrap nodes must advertise a public IP address for other nodes to connect\n") + fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --bootstrap --vps-ip \n") + fmt.Fprintf(os.Stderr, " Example: sudo dbn prod install --bootstrap --vps-ip 203.0.113.1\n") + os.Exit(1) + } + // Validate secondary bootstrap requirements + if *bootstrapJoin == "" { + fmt.Fprintf(os.Stderr, "⚠️ Warning: Primary bootstrap node detected (--bootstrap without --bootstrap-join)\n") + fmt.Fprintf(os.Stderr, " This node will form a new cluster. To join existing cluster as secondary bootstrap:\n") + fmt.Fprintf(os.Stderr, " sudo dbn prod install --bootstrap --vps-ip %s --bootstrap-join :7001 --peers \n", *vpsIP) + } + } + + // Validate non-bootstrap node requirements + if !*isBootstrap { + if *vpsIP == "" { + fmt.Fprintf(os.Stderr, "❌ --vps-ip is required for non-bootstrap nodes\n") + fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --vps-ip --peers \n") + os.Exit(1) + } + if len(bootstrapPeers) == 0 { + fmt.Fprintf(os.Stderr, "❌ --peers is required for non-bootstrap nodes\n") + fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --vps-ip --peers \n") + fmt.Fprintf(os.Stderr, " Example: --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...\n") + os.Exit(1) + } } debrosHome := "/home/debros" debrosDir := debrosHome + "/.debros" - setup := production.NewProductionSetup(debrosHome, os.Stdout, force, branch, false, skipResourceChecks) + setup := production.NewProductionSetup(debrosHome, os.Stdout, *force, *branch, false, *skipResourceChecks) + + // Check port availability before proceeding + if err := ensurePortsAvailable("prod install", defaultPorts()); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } // Save branch preference for future upgrades - if err := production.SaveBranchPreference(debrosDir, branch); err != nil { + if err := production.SaveBranchPreference(debrosDir, *branch); err != nil { fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to save branch preference: %v\n", err) } @@ -198,14 +250,14 @@ func handleProdInstall(args []string) { // Determine node type early nodeType := "node" - if isBootstrap { + if *isBootstrap { nodeType = "bootstrap" } // Phase 3: Generate secrets FIRST (before service initialization) // This ensures cluster secret and swarm key exist before repos are seeded fmt.Printf("\n🔐 Phase 3: Generating secrets...\n") - if err := setup.Phase3GenerateSecrets(isBootstrap); err != nil { + if err := setup.Phase3GenerateSecrets(*isBootstrap); err != nil { fmt.Fprintf(os.Stderr, "❌ Secret generation failed: %v\n", err) os.Exit(1) } @@ -219,56 +271,70 @@ func handleProdInstall(args []string) { // Phase 4: Generate configs fmt.Printf("\n⚙️ Phase 4: Generating configurations...\n") - enableHTTPS := domain != "" - if err := setup.Phase4GenerateConfigs(isBootstrap, bootstrapPeers, vpsIP, enableHTTPS, domain, bootstrapJoin); err != nil { + enableHTTPS := *domain != "" + if err := setup.Phase4GenerateConfigs(*isBootstrap, bootstrapPeers, *vpsIP, enableHTTPS, *domain, *bootstrapJoin); err != nil { fmt.Fprintf(os.Stderr, "❌ Configuration generation failed: %v\n", err) os.Exit(1) } // Phase 5: Create systemd services fmt.Printf("\n🔧 Phase 5: Creating systemd services...\n") - if err := setup.Phase5CreateSystemdServices(nodeType, vpsIP); err != nil { + if err := setup.Phase5CreateSystemdServices(nodeType, *vpsIP); err != nil { fmt.Fprintf(os.Stderr, "❌ Service creation failed: %v\n", err) os.Exit(1) } + // Give services a moment to fully initialize before verification + fmt.Printf("\n⏳ Waiting for services to initialize...\n") + time.Sleep(5 * time.Second) + + // Verify all services are running correctly + if err := verifyProductionRuntime("prod install"); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + fmt.Fprintf(os.Stderr, " Installation completed but services are not healthy. Check logs with: dbn prod logs \n") + os.Exit(1) + } + // Log completion with actual peer ID setup.LogSetupComplete(setup.NodePeerID) - fmt.Printf("✅ Production installation complete!\n\n") + fmt.Printf("✅ Production installation complete and healthy!\n\n") } func handleProdUpgrade(args []string) { - // Parse arguments - force := false - restartServices := false - noPull := false - branch := "" - for i, arg := range args { - if arg == "--force" { - force = true - } - if arg == "--restart" { - restartServices = true - } - if arg == "--no-pull" { - noPull = true - } - if arg == "--nightly" { - branch = "nightly" - } - if arg == "--main" { - branch = "main" - } - if arg == "--branch" { - if i+1 < len(args) { - branch = args[i+1] - } + // Parse arguments using flag.FlagSet + fs := flag.NewFlagSet("upgrade", flag.ContinueOnError) + fs.SetOutput(os.Stderr) + + force := fs.Bool("force", false, "Reconfigure all settings") + restartServices := fs.Bool("restart", false, "Automatically restart services after upgrade") + noPull := fs.Bool("no-pull", false, "Skip git clone/pull, use existing /home/debros/src") + branch := fs.String("branch", "", "Git branch to use (main or nightly, uses saved preference if not specified)") + + // Support legacy flags for backwards compatibility + fs.Bool("nightly", false, "Use nightly branch (deprecated, use --branch nightly)") + fs.Bool("main", false, "Use main branch (deprecated, use --branch main)") + + if err := fs.Parse(args); err != nil { + if err == flag.ErrHelp { + return } + fmt.Fprintf(os.Stderr, "❌ Failed to parse flags: %v\n", err) + os.Exit(1) + } + + // Handle legacy flags + nightlyFlag := fs.Lookup("nightly") + mainFlag := fs.Lookup("main") + if nightlyFlag != nil && nightlyFlag.Value.String() == "true" { + *branch = "nightly" + } + if mainFlag != nil && mainFlag.Value.String() == "true" { + *branch = "main" } // Validate branch if provided - if branch != "" && branch != "main" && branch != "nightly" { - fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", branch) + if *branch != "" && *branch != "main" && *branch != "nightly" { + fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", *branch) os.Exit(1) } @@ -283,20 +349,20 @@ func handleProdUpgrade(args []string) { fmt.Printf(" This will preserve existing configurations and data\n") fmt.Printf(" Configurations will be updated to latest format\n\n") - setup := production.NewProductionSetup(debrosHome, os.Stdout, force, branch, noPull, false) + setup := production.NewProductionSetup(debrosHome, os.Stdout, *force, *branch, *noPull, false) // Log if --no-pull is enabled - if noPull { + if *noPull { fmt.Printf(" ⚠️ --no-pull flag enabled: Skipping git clone/pull\n") fmt.Printf(" Using existing repository at %s/src\n", debrosHome) } // If branch was explicitly provided, save it for future upgrades - if branch != "" { - if err := production.SaveBranchPreference(debrosDir, branch); err != nil { + if *branch != "" { + if err := production.SaveBranchPreference(debrosDir, *branch); err != nil { fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to save branch preference: %v\n", err) } else { - fmt.Printf(" Using branch: %s (saved for future upgrades)\n", branch) + fmt.Printf(" Using branch: %s (saved for future upgrades)\n", *branch) } } else { // Show which branch is being used (read from saved preference) @@ -347,6 +413,12 @@ func handleProdUpgrade(args []string) { time.Sleep(2 * time.Second) } + // Check port availability after stopping services + if err := ensurePortsAvailable("prod upgrade", defaultPorts()); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + // Phase 2b: Install/update binaries fmt.Printf("\nPhase 2b: Installing/updating binaries...\n") if err := setup.Phase2bInstallBinaries(); err != nil { @@ -438,22 +510,37 @@ func handleProdUpgrade(args []string) { // Extract bootstrap peers from existing node config bootstrapPeers := extractBootstrapPeers(nodeConfigPath) - // Extract bootstrap join address if it's a bootstrap node - if nodeType == "bootstrap" { - if data, err := os.ReadFile(nodeConfigPath); err == nil { - configStr := string(data) - for _, line := range strings.Split(configStr, "\n") { - trimmed := strings.TrimSpace(line) - if strings.HasPrefix(trimmed, "rqlite_join_address:") { - parts := strings.SplitN(trimmed, ":", 2) - if len(parts) > 1 { - bootstrapJoin = strings.TrimSpace(parts[1]) - bootstrapJoin = strings.Trim(bootstrapJoin, "\"'") - if bootstrapJoin == "null" || bootstrapJoin == "" { - bootstrapJoin = "" + // Extract VPS IP from advertise addresses and bootstrap join address + vpsIP := "" + if data, err := os.ReadFile(nodeConfigPath); err == nil { + configStr := string(data) + for _, line := range strings.Split(configStr, "\n") { + trimmed := strings.TrimSpace(line) + // Try to extract VPS IP from http_adv_address or raft_adv_address + // Only set if not already found (first valid IP wins) + if vpsIP == "" && (strings.HasPrefix(trimmed, "http_adv_address:") || strings.HasPrefix(trimmed, "raft_adv_address:")) { + parts := strings.SplitN(trimmed, ":", 2) + if len(parts) > 1 { + addr := strings.TrimSpace(parts[1]) + addr = strings.Trim(addr, "\"'") + if addr != "" && addr != "null" && addr != "localhost:5001" && addr != "localhost:7001" { + // Extract IP from address (format: "IP:PORT" or "[IPv6]:PORT") + if host, _, err := net.SplitHostPort(addr); err == nil && host != "" && host != "localhost" { + vpsIP = host + // Continue loop to also check for bootstrap join address } } - break + } + } + // Extract bootstrap join address if it's a bootstrap node + if nodeType == "bootstrap" && strings.HasPrefix(trimmed, "rqlite_join_address:") { + parts := strings.SplitN(trimmed, ":", 2) + if len(parts) > 1 { + bootstrapJoin = strings.TrimSpace(parts[1]) + bootstrapJoin = strings.Trim(bootstrapJoin, "\"'") + if bootstrapJoin == "null" || bootstrapJoin == "" { + bootstrapJoin = "" + } } } } @@ -487,6 +574,9 @@ func handleProdUpgrade(args []string) { if len(bootstrapPeers) > 0 { fmt.Printf(" - Bootstrap peers: %d peer(s) preserved\n", len(bootstrapPeers)) } + if vpsIP != "" { + fmt.Printf(" - VPS IP: %s\n", vpsIP) + } if domain != "" { fmt.Printf(" - Domain: %s\n", domain) } @@ -494,7 +584,7 @@ func handleProdUpgrade(args []string) { fmt.Printf(" - Bootstrap join address: %s\n", bootstrapJoin) } - if err := setup.Phase4GenerateConfigs(nodeType == "bootstrap", bootstrapPeers, "", enableHTTPS, domain, bootstrapJoin); err != nil { + if err := setup.Phase4GenerateConfigs(nodeType == "bootstrap", bootstrapPeers, vpsIP, enableHTTPS, domain, bootstrapJoin); err != nil { fmt.Fprintf(os.Stderr, "⚠️ Config generation warning: %v\n", err) fmt.Fprintf(os.Stderr, " Existing configs preserved\n") } @@ -506,23 +596,36 @@ func handleProdUpgrade(args []string) { } fmt.Printf("\n✅ Upgrade complete!\n") - if restartServices { + if *restartServices { fmt.Printf(" Restarting services...\n") // Reload systemd daemon - exec.Command("systemctl", "daemon-reload").Run() - // Restart services to apply changes - services := []string{ - "debros-ipfs-bootstrap", - "debros-ipfs-cluster-bootstrap", - // Note: RQLite is managed by node process, not as separate service - "debros-olric", - "debros-node-bootstrap", - "debros-gateway", + if err := exec.Command("systemctl", "daemon-reload").Run(); err != nil { + fmt.Fprintf(os.Stderr, " ⚠️ Warning: Failed to reload systemd daemon: %v\n", err) } - for _, svc := range services { - exec.Command("systemctl", "restart", svc).Run() + // Restart services to apply changes - use getProductionServices to only restart existing services + services := getProductionServices() + if len(services) == 0 { + fmt.Printf(" ⚠️ No services found to restart\n") + } else { + for _, svc := range services { + if err := exec.Command("systemctl", "restart", svc).Run(); err != nil { + fmt.Printf(" ⚠️ Failed to restart %s: %v\n", svc, err) + } else { + fmt.Printf(" ✓ Restarted %s\n", svc) + } + } + fmt.Printf(" ✓ All services restarted\n") + // Give services a moment to fully initialize before verification + fmt.Printf(" ⏳ Waiting for services to initialize...\n") + time.Sleep(5 * time.Second) + // Verify services are healthy after restart + if err := verifyProductionRuntime("prod upgrade --restart"); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + fmt.Fprintf(os.Stderr, " Upgrade completed but services are not healthy. Check logs with: dbn prod logs \n") + os.Exit(1) + } + fmt.Printf(" ✅ All services verified healthy\n") } - fmt.Printf(" ✓ Services restarted\n") } else { fmt.Printf(" To apply changes, restart services:\n") fmt.Printf(" sudo systemctl daemon-reload\n") @@ -587,18 +690,113 @@ func handleProdStatus() { fmt.Printf("\nView logs with: dbn prod logs \n") } +// resolveServiceName resolves service aliases to actual systemd service names +func resolveServiceName(alias string) ([]string, error) { + // Service alias mapping + aliases := map[string][]string{ + "node": {"debros-node-bootstrap", "debros-node-node"}, + "ipfs": {"debros-ipfs-bootstrap", "debros-ipfs-node"}, + "cluster": {"debros-ipfs-cluster-bootstrap", "debros-ipfs-cluster-node"}, + "ipfs-cluster": {"debros-ipfs-cluster-bootstrap", "debros-ipfs-cluster-node"}, + "gateway": {"debros-gateway"}, + "olric": {"debros-olric"}, + "rqlite": {"debros-node-bootstrap", "debros-node-node"}, // RQLite logs are in node logs + } + + // Check if it's an alias + if serviceNames, ok := aliases[strings.ToLower(alias)]; ok { + // Filter to only existing services + var existing []string + for _, svc := range serviceNames { + unitPath := filepath.Join("/etc/systemd/system", svc+".service") + if _, err := os.Stat(unitPath); err == nil { + existing = append(existing, svc) + } + } + if len(existing) == 0 { + return nil, fmt.Errorf("no services found for alias %q", alias) + } + return existing, nil + } + + // Check if it's already a full service name + unitPath := filepath.Join("/etc/systemd/system", alias+".service") + if _, err := os.Stat(unitPath); err == nil { + return []string{alias}, nil + } + + // Try without .service suffix + if !strings.HasSuffix(alias, ".service") { + unitPath = filepath.Join("/etc/systemd/system", alias+".service") + if _, err := os.Stat(unitPath); err == nil { + return []string{alias}, nil + } + } + + return nil, fmt.Errorf("service %q not found. Use: node, ipfs, cluster, gateway, olric, or full service name", alias) +} + func handleProdLogs(args []string) { if len(args) == 0 { fmt.Fprintf(os.Stderr, "Usage: dbn prod logs [--follow]\n") + fmt.Fprintf(os.Stderr, "\nService aliases:\n") + fmt.Fprintf(os.Stderr, " node, ipfs, cluster, gateway, olric\n") + fmt.Fprintf(os.Stderr, "\nOr use full service name:\n") + fmt.Fprintf(os.Stderr, " debros-node-bootstrap, debros-gateway, etc.\n") os.Exit(1) } - service := args[0] + serviceAlias := args[0] follow := false if len(args) > 1 && (args[1] == "--follow" || args[1] == "-f") { follow = true } + // Resolve service alias to actual service names + serviceNames, err := resolveServiceName(serviceAlias) + if err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + fmt.Fprintf(os.Stderr, "\nAvailable service aliases: node, ipfs, cluster, gateway, olric\n") + fmt.Fprintf(os.Stderr, "Or use full service name like: debros-node-bootstrap\n") + os.Exit(1) + } + + // If multiple services match, show all of them + if len(serviceNames) > 1 { + if follow { + fmt.Fprintf(os.Stderr, "⚠️ Multiple services match alias %q:\n", serviceAlias) + for _, svc := range serviceNames { + fmt.Fprintf(os.Stderr, " - %s\n", svc) + } + fmt.Fprintf(os.Stderr, "\nShowing logs for all matching services...\n\n") + // Use journalctl with multiple units (build args correctly) + args := []string{} + for _, svc := range serviceNames { + args = append(args, "-u", svc) + } + args = append(args, "-f") + cmd := exec.Command("journalctl", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + cmd.Run() + } else { + for i, svc := range serviceNames { + if i > 0 { + fmt.Printf("\n" + strings.Repeat("=", 70) + "\n\n") + } + fmt.Printf("📋 Logs for %s:\n\n", svc) + cmd := exec.Command("journalctl", "-u", svc, "-n", "50") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Run() + } + } + return + } + + // Single service + service := serviceNames[0] if follow { fmt.Printf("Following logs for %s (press Ctrl+C to stop)...\n\n", service) cmd := exec.Command("journalctl", "-u", service, "-f") @@ -614,6 +812,165 @@ func handleProdLogs(args []string) { } } +// errServiceNotFound marks units that systemd does not know about. +var errServiceNotFound = errors.New("service not found") + +type portSpec struct { + Name string + Port int +} + +var servicePorts = map[string][]portSpec{ + "debros-gateway": {{"Gateway API", 6001}}, + "debros-olric": {{"Olric HTTP", 3320}, {"Olric Memberlist", 3322}}, + "debros-node-bootstrap": {{"RQLite HTTP", 5001}, {"RQLite Raft", 7001}, {"IPFS Cluster API", 9094}}, + "debros-node-node": {{"RQLite HTTP", 5001}, {"RQLite Raft", 7001}, {"IPFS Cluster API", 9094}}, + "debros-ipfs-bootstrap": {{"IPFS API", 4501}, {"IPFS Gateway", 8080}, {"IPFS Swarm", 4001}}, + "debros-ipfs-node": {{"IPFS API", 4501}, {"IPFS Gateway", 8080}, {"IPFS Swarm", 4001}}, + "debros-ipfs-cluster-bootstrap": {{"IPFS Cluster API", 9094}}, + "debros-ipfs-cluster-node": {{"IPFS Cluster API", 9094}}, +} + +// defaultPorts is used for fresh installs/upgrades before unit files exist. +func defaultPorts() []portSpec { + return []portSpec{ + {"IPFS Swarm", 4001}, + {"IPFS API", 4501}, + {"IPFS Gateway", 8080}, + {"Gateway API", 6001}, + {"RQLite HTTP", 5001}, + {"RQLite Raft", 7001}, + {"IPFS Cluster API", 9094}, + {"Olric HTTP", 3320}, + {"Olric Memberlist", 3322}, + } +} + +func isServiceActive(service string) (bool, error) { + cmd := exec.Command("systemctl", "is-active", "--quiet", service) + if err := cmd.Run(); err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + switch exitErr.ExitCode() { + case 3: + return false, nil + case 4: + return false, errServiceNotFound + } + } + return false, err + } + return true, nil +} + +func collectPortsForServices(services []string, skipActive bool) ([]portSpec, error) { + seen := make(map[int]portSpec) + for _, svc := range services { + if skipActive { + active, err := isServiceActive(svc) + if err != nil { + return nil, fmt.Errorf("unable to check %s: %w", svc, err) + } + if active { + continue + } + } + for _, spec := range servicePorts[svc] { + if _, ok := seen[spec.Port]; !ok { + seen[spec.Port] = spec + } + } + } + ports := make([]portSpec, 0, len(seen)) + for _, spec := range seen { + ports = append(ports, spec) + } + return ports, nil +} + +func ensurePortsAvailable(action string, ports []portSpec) error { + for _, spec := range ports { + ln, err := net.Listen("tcp", fmt.Sprintf("0.0.0.0:%d", spec.Port)) + if err != nil { + if errors.Is(err, syscall.EADDRINUSE) || strings.Contains(err.Error(), "address already in use") { + return fmt.Errorf("%s cannot continue: %s (port %d) is already in use", action, spec.Name, spec.Port) + } + return fmt.Errorf("%s cannot continue: failed to inspect %s (port %d): %w", action, spec.Name, spec.Port, err) + } + _ = ln.Close() + } + return nil +} + +func checkHTTP(client *http.Client, method, url, label string) error { + req, err := http.NewRequest(method, url, nil) + if err != nil { + return fmt.Errorf("%s check failed: %w", label, err) + } + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("%s check failed: %w", label, err) + } + defer resp.Body.Close() + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("%s returned HTTP %d", label, resp.StatusCode) + } + return nil +} + +func serviceExists(name string) bool { + unitPath := filepath.Join("/etc/systemd/system", name+".service") + _, err := os.Stat(unitPath) + return err == nil +} + +func verifyProductionRuntime(action string) error { + services := getProductionServices() + issues := make([]string, 0) + + for _, svc := range services { + active, err := isServiceActive(svc) + if err != nil { + issues = append(issues, fmt.Sprintf("%s status unknown (%v)", svc, err)) + continue + } + if !active { + issues = append(issues, fmt.Sprintf("%s is inactive", svc)) + } + } + + client := &http.Client{Timeout: 3 * time.Second} + + if err := checkHTTP(client, "GET", "http://127.0.0.1:5001/status", "RQLite status"); err == nil { + } else if serviceExists("debros-node-bootstrap") || serviceExists("debros-node-node") { + issues = append(issues, err.Error()) + } + + if err := checkHTTP(client, "POST", "http://127.0.0.1:4501/api/v0/version", "IPFS API"); err == nil { + } else if serviceExists("debros-ipfs-bootstrap") || serviceExists("debros-ipfs-node") { + issues = append(issues, err.Error()) + } + + if err := checkHTTP(client, "GET", "http://127.0.0.1:9094/health", "IPFS Cluster"); err == nil { + } else if serviceExists("debros-ipfs-cluster-bootstrap") || serviceExists("debros-ipfs-cluster-node") { + issues = append(issues, err.Error()) + } + + if err := checkHTTP(client, "GET", "http://127.0.0.1:6001/health", "Gateway health"); err == nil { + } else if serviceExists("debros-gateway") { + issues = append(issues, err.Error()) + } + + if err := checkHTTP(client, "GET", "http://127.0.0.1:3320/ping", "Olric ping"); err == nil { + } else if serviceExists("debros-olric") { + issues = append(issues, err.Error()) + } + + if len(issues) > 0 { + return fmt.Errorf("%s verification failed:\n - %s", action, strings.Join(issues, "\n - ")) + } + return nil +} + // getProductionServices returns a list of all DeBros production service names that exist func getProductionServices() []string { // All possible service names (both bootstrap and node variants) @@ -655,16 +1012,57 @@ func handleProdStart() { return } + // Check which services are inactive and need to be started + inactive := make([]string, 0, len(services)) for _, svc := range services { - cmd := exec.Command("systemctl", "start", svc) - if err := cmd.Run(); err != nil { + active, err := isServiceActive(svc) + if err != nil { + fmt.Printf(" ⚠️ Unable to check %s: %v\n", svc, err) + continue + } + if active { + fmt.Printf(" ℹ️ %s already running\n", svc) + continue + } + inactive = append(inactive, svc) + } + + if len(inactive) == 0 { + fmt.Printf("\n✅ All services already running\n") + return + } + + // Check port availability for services we're about to start + ports, err := collectPortsForServices(inactive, false) + if err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + if err := ensurePortsAvailable("prod start", ports); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + + // Start inactive services + for _, svc := range inactive { + if err := exec.Command("systemctl", "start", svc).Run(); err != nil { fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err) } else { fmt.Printf(" ✓ Started %s\n", svc) } } - fmt.Printf("\n✅ All services started\n") + // Give services a moment to fully initialize before verification + fmt.Printf(" ⏳ Waiting for services to initialize...\n") + time.Sleep(3 * time.Second) + + // Verify all services are healthy + if err := verifyProductionRuntime("prod start"); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + + fmt.Printf("\n✅ All services started and healthy\n") } func handleProdStop() { @@ -681,16 +1079,41 @@ func handleProdStop() { return } + hadError := false for _, svc := range services { - cmd := exec.Command("systemctl", "stop", svc) - if err := cmd.Run(); err != nil { + active, err := isServiceActive(svc) + if err != nil { + fmt.Printf(" ⚠️ Unable to check %s: %v\n", svc, err) + hadError = true + continue + } + if !active { + fmt.Printf(" ℹ️ %s already stopped\n", svc) + continue + } + if err := exec.Command("systemctl", "stop", svc).Run(); err != nil { fmt.Printf(" ⚠️ Failed to stop %s: %v\n", svc, err) + hadError = true + continue + } + // Verify the service actually stopped and didn't restart itself + if stillActive, err := isServiceActive(svc); err != nil { + fmt.Printf(" ⚠️ Unable to verify %s stop: %v\n", svc, err) + hadError = true + } else if stillActive { + fmt.Printf(" ❌ %s restarted itself immediately\n", svc) + hadError = true } else { fmt.Printf(" ✓ Stopped %s\n", svc) } } - fmt.Printf("\n✅ All services stopped\n") + if hadError { + fmt.Fprintf(os.Stderr, "\n❌ One or more services failed to stop cleanly\n") + os.Exit(1) + } + + fmt.Printf("\n✅ All services stopped and remain inactive\n") } func handleProdRestart() { @@ -707,16 +1130,57 @@ func handleProdRestart() { return } + // Stop all active services first + fmt.Printf(" Stopping services...\n") for _, svc := range services { - cmd := exec.Command("systemctl", "restart", svc) - if err := cmd.Run(); err != nil { - fmt.Printf(" ⚠️ Failed to restart %s: %v\n", svc, err) + active, err := isServiceActive(svc) + if err != nil { + fmt.Printf(" ⚠️ Unable to check %s: %v\n", svc, err) + continue + } + if !active { + fmt.Printf(" ℹ️ %s was already stopped\n", svc) + continue + } + if err := exec.Command("systemctl", "stop", svc).Run(); err != nil { + fmt.Printf(" ⚠️ Failed to stop %s: %v\n", svc, err) } else { - fmt.Printf(" ✓ Restarted %s\n", svc) + fmt.Printf(" ✓ Stopped %s\n", svc) } } - fmt.Printf("\n✅ All services restarted\n") + // Check port availability before restarting + ports, err := collectPortsForServices(services, false) + if err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + if err := ensurePortsAvailable("prod restart", ports); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + + // Start all services + fmt.Printf(" Starting services...\n") + for _, svc := range services { + if err := exec.Command("systemctl", "start", svc).Run(); err != nil { + fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err) + } else { + fmt.Printf(" ✓ Started %s\n", svc) + } + } + + // Give services a moment to fully initialize before verification + fmt.Printf(" ⏳ Waiting for services to initialize...\n") + time.Sleep(3 * time.Second) + + // Verify all services are healthy + if err := verifyProductionRuntime("prod restart"); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + + fmt.Printf("\n✅ All services restarted and healthy\n") } func handleProdUninstall() { diff --git a/pkg/environments/production/config.go b/pkg/environments/production/config.go index 58ee253..e040f1d 100644 --- a/pkg/environments/production/config.go +++ b/pkg/environments/production/config.go @@ -4,6 +4,7 @@ import ( "crypto/rand" "encoding/hex" "fmt" + "net" "os" "os/exec" "path/filepath" @@ -12,6 +13,7 @@ import ( "github.com/DeBrosOfficial/network/pkg/environments/templates" "github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/peer" + "github.com/multiformats/go-multiaddr" ) // ConfigGenerator manages generation of node, gateway, and service configs @@ -26,6 +28,69 @@ func NewConfigGenerator(debrosDir string) *ConfigGenerator { } } +// extractIPFromMultiaddr extracts the IP address from a bootstrap peer multiaddr +// Supports IP4, IP6, DNS4, DNS6, and DNSADDR protocols +// Returns the IP address as a string, or empty string if extraction/resolution fails +func extractIPFromMultiaddr(multiaddrStr string) string { + ma, err := multiaddr.NewMultiaddr(multiaddrStr) + if err != nil { + return "" + } + + // First, try to extract direct IP address + var ip net.IP + var dnsName string + multiaddr.ForEach(ma, func(c multiaddr.Component) bool { + switch c.Protocol().Code { + case multiaddr.P_IP4, multiaddr.P_IP6: + ip = net.ParseIP(c.Value()) + return false // Stop iteration - found IP + case multiaddr.P_DNS4, multiaddr.P_DNS6, multiaddr.P_DNSADDR: + dnsName = c.Value() + // Continue to check for IP, but remember DNS name as fallback + } + return true + }) + + // If we found a direct IP, return it + if ip != nil { + return ip.String() + } + + // If we found a DNS name, try to resolve it + if dnsName != "" { + if resolvedIPs, err := net.LookupIP(dnsName); err == nil && len(resolvedIPs) > 0 { + // Prefer IPv4 addresses, but accept IPv6 if that's all we have + for _, resolvedIP := range resolvedIPs { + if resolvedIP.To4() != nil { + return resolvedIP.String() + } + } + // Return first IPv6 address if no IPv4 found + return resolvedIPs[0].String() + } + } + + return "" +} + +// inferBootstrapIP extracts the IP address from bootstrap peer multiaddrs +// Iterates through all bootstrap peers to find a valid IP (supports DNS resolution) +// Falls back to vpsIP if provided, otherwise returns empty string +func inferBootstrapIP(bootstrapPeers []string, vpsIP string) string { + // Try to extract IP from each bootstrap peer (in order) + for _, peer := range bootstrapPeers { + if ip := extractIPFromMultiaddr(peer); ip != "" { + return ip + } + } + // Fall back to vpsIP if provided + if vpsIP != "" { + return vpsIP + } + return "" +} + // GenerateNodeConfig generates node.yaml configuration func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers []string, vpsIP string, bootstrapJoin string) (string, error) { var nodeID string @@ -35,6 +100,38 @@ func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers [ nodeID = "node" } + // Determine advertise addresses + // For bootstrap: use vpsIP if provided, otherwise localhost + // For regular nodes: infer from bootstrap peers or use vpsIP + var httpAdvAddr, raftAdvAddr string + if isBootstrap { + if vpsIP != "" { + httpAdvAddr = net.JoinHostPort(vpsIP, "5001") + raftAdvAddr = net.JoinHostPort(vpsIP, "7001") + } else { + httpAdvAddr = "localhost:5001" + raftAdvAddr = "localhost:7001" + } + } else { + // Regular node: infer from bootstrap peers or use vpsIP + bootstrapIP := inferBootstrapIP(bootstrapPeers, vpsIP) + if bootstrapIP != "" { + // Use the bootstrap IP for advertise addresses (this node should be reachable at same network) + // If vpsIP is provided, use it; otherwise use bootstrap IP + if vpsIP != "" { + httpAdvAddr = net.JoinHostPort(vpsIP, "5001") + raftAdvAddr = net.JoinHostPort(vpsIP, "7001") + } else { + httpAdvAddr = net.JoinHostPort(bootstrapIP, "5001") + raftAdvAddr = net.JoinHostPort(bootstrapIP, "7001") + } + } else { + // Fallback to localhost if nothing can be inferred + httpAdvAddr = "localhost:5001" + raftAdvAddr = "localhost:7001" + } + } + if isBootstrap { // Bootstrap node - populate peer list and optional join address data := templates.BootstrapConfigData{ @@ -47,14 +144,35 @@ func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers [ IPFSAPIPort: 4501, BootstrapPeers: bootstrapPeers, RQLiteJoinAddress: bootstrapJoin, + HTTPAdvAddress: httpAdvAddr, + RaftAdvAddress: raftAdvAddr, } return templates.RenderBootstrapConfig(data) } - // Regular node - must have join address - rqliteJoinAddr := "localhost:7001" - if vpsIP != "" { - rqliteJoinAddr = vpsIP + ":7001" + // Regular node - infer join address from bootstrap peers + // MUST extract from bootstrap_peers - no fallback to vpsIP (would cause self-join) + var rqliteJoinAddr string + bootstrapIP := inferBootstrapIP(bootstrapPeers, "") + if bootstrapIP == "" { + // Try to extract from first bootstrap peer directly as fallback + if len(bootstrapPeers) > 0 { + if extractedIP := extractIPFromMultiaddr(bootstrapPeers[0]); extractedIP != "" { + bootstrapIP = extractedIP + } + } + + // If still no IP, fail - we cannot join without a valid bootstrap address + if bootstrapIP == "" { + return "", fmt.Errorf("cannot determine RQLite join address: failed to extract IP from bootstrap peers %v (required for non-bootstrap nodes)", bootstrapPeers) + } + } + + rqliteJoinAddr = net.JoinHostPort(bootstrapIP, "7001") + + // Validate that join address doesn't match this node's own raft address (would cause self-join) + if rqliteJoinAddr == raftAdvAddr { + return "", fmt.Errorf("invalid configuration: rqlite_join_address (%s) cannot match raft_adv_address (%s) - node cannot join itself", rqliteJoinAddr, raftAdvAddr) } data := templates.NodeConfigData{ @@ -67,6 +185,8 @@ func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers [ BootstrapPeers: bootstrapPeers, ClusterAPIPort: 9094, IPFSAPIPort: 4501, + HTTPAdvAddress: httpAdvAddr, + RaftAdvAddress: raftAdvAddr, } return templates.RenderNodeConfig(data) } diff --git a/pkg/environments/production/orchestrator.go b/pkg/environments/production/orchestrator.go index 94bc975..1577a79 100644 --- a/pkg/environments/production/orchestrator.go +++ b/pkg/environments/production/orchestrator.go @@ -3,6 +3,7 @@ package production import ( "fmt" "io" + "net" "os" "os/exec" "path/filepath" @@ -368,8 +369,20 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(isBootstrap bool, bootstrapPeer } ps.logf(" ✓ Node config generated: %s", configFile) - // Gateway config - olricServers := []string{"127.0.0.1:3320"} + // Gateway config - infer Olric servers from bootstrap peers or use localhost + olricServers := []string{"127.0.0.1:3320"} // Default to localhost for single-node + if len(bootstrapPeers) > 0 { + // Try to infer Olric servers from bootstrap peers + bootstrapIP := inferBootstrapIP(bootstrapPeers, vpsIP) + if bootstrapIP != "" { + // Add bootstrap Olric server (use net.JoinHostPort for IPv6 support) + olricServers = []string{net.JoinHostPort(bootstrapIP, "3320")} + // If this is not bootstrap and vpsIP is provided, add local Olric server too + if !isBootstrap && vpsIP != "" { + olricServers = append(olricServers, net.JoinHostPort(vpsIP, "3320")) + } + } + } gatewayConfig, err := ps.configGenerator.GenerateGatewayConfig(bootstrapPeers, enableHTTPS, domain, olricServers) if err != nil { return fmt.Errorf("failed to generate gateway config: %w", err) @@ -380,8 +393,8 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(isBootstrap bool, bootstrapPeer } ps.logf(" ✓ Gateway config generated") - // Olric config - olricConfig, err := ps.configGenerator.GenerateOlricConfig("localhost", 3320, 3322) + // Olric config - use 0.0.0.0 to bind on all interfaces + olricConfig, err := ps.configGenerator.GenerateOlricConfig("0.0.0.0", 3320, 3322) if err != nil { return fmt.Errorf("failed to generate olric config: %w", err) } @@ -533,7 +546,7 @@ func (ps *ProductionSetup) LogSetupComplete(peerID string) { ps.logf(" %s/logs/node-bootstrap.log", ps.debrosDir) ps.logf(" %s/logs/gateway.log", ps.debrosDir) ps.logf("\nStart All Services:") - ps.logf(" systemctl start debros-ipfs-bootstrap debros-ipfs-cluster-bootstrap debros-rqlite-bootstrap debros-olric debros-node-bootstrap debros-gateway") + ps.logf(" systemctl start debros-ipfs-bootstrap debros-ipfs-cluster-bootstrap debros-olric debros-node-bootstrap debros-gateway") ps.logf("\nVerify Installation:") ps.logf(" curl http://localhost:6001/health") ps.logf(" curl http://localhost:5001/status\n") diff --git a/pkg/environments/production/services.go b/pkg/environments/production/services.go index fd6b615..1463040 100644 --- a/pkg/environments/production/services.go +++ b/pkg/environments/production/services.go @@ -245,7 +245,7 @@ User=debros Group=debros WorkingDirectory=%s Environment=HOME=%s - ExecStart=%s/bin/gateway --config %s/data/gateway.yaml +ExecStart=%s/bin/gateway --config %s/data/gateway.yaml Restart=always RestartSec=5 StandardOutput=file:%s diff --git a/pkg/environments/templates/bootstrap.yaml b/pkg/environments/templates/bootstrap.yaml index ba7d98d..3fb3033 100644 --- a/pkg/environments/templates/bootstrap.yaml +++ b/pkg/environments/templates/bootstrap.yaml @@ -31,8 +31,8 @@ discovery: {{end}} discovery_interval: "15s" bootstrap_port: {{.P2PPort}} - http_adv_address: "localhost:{{.RQLiteHTTPPort}}" - raft_adv_address: "localhost:{{.RQLiteRaftPort}}" + http_adv_address: "{{.HTTPAdvAddress}}" + raft_adv_address: "{{.RaftAdvAddress}}" node_namespace: "default" security: diff --git a/pkg/environments/templates/node.yaml b/pkg/environments/templates/node.yaml index bc72840..9650b0a 100644 --- a/pkg/environments/templates/node.yaml +++ b/pkg/environments/templates/node.yaml @@ -31,8 +31,8 @@ discovery: {{end}} discovery_interval: "15s" bootstrap_port: {{.P2PPort}} - http_adv_address: "localhost:{{.RQLiteHTTPPort}}" - raft_adv_address: "localhost:{{.RQLiteRaftPort}}" + http_adv_address: "{{.HTTPAdvAddress}}" + raft_adv_address: "{{.RaftAdvAddress}}" node_namespace: "default" security: diff --git a/pkg/environments/templates/render.go b/pkg/environments/templates/render.go index 96b8eb8..0fc5bf6 100644 --- a/pkg/environments/templates/render.go +++ b/pkg/environments/templates/render.go @@ -22,6 +22,8 @@ type BootstrapConfigData struct { IPFSAPIPort int // Default: 4501 BootstrapPeers []string // List of bootstrap peer multiaddrs RQLiteJoinAddress string // Optional: join address for secondary bootstraps + HTTPAdvAddress string // Advertised HTTP address (IP:port) + RaftAdvAddress string // Advertised Raft address (IP:port) } // NodeConfigData holds parameters for node.yaml rendering @@ -34,7 +36,9 @@ type NodeConfigData struct { RQLiteJoinAddress string BootstrapPeers []string ClusterAPIPort int - IPFSAPIPort int // Default: 4501+ + IPFSAPIPort int // Default: 4501+ + HTTPAdvAddress string // Advertised HTTP address (IP:port) + RaftAdvAddress string // Advertised Raft address (IP:port) } // GatewayConfigData holds parameters for gateway.yaml rendering diff --git a/pkg/node/node.go b/pkg/node/node.go index 5a4ec9b..bdded3c 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -4,6 +4,7 @@ import ( "context" "fmt" mathrand "math/rand" + "net" "os" "path/filepath" "strings" @@ -131,6 +132,51 @@ func (n *Node) startRQLite(ctx context.Context) error { return nil } +// extractIPFromMultiaddr extracts the IP address from a bootstrap peer multiaddr +// Supports IP4, IP6, DNS4, DNS6, and DNSADDR protocols +func extractIPFromMultiaddr(multiaddrStr string) string { + ma, err := multiaddr.NewMultiaddr(multiaddrStr) + if err != nil { + return "" + } + + // First, try to extract direct IP address + var ip string + var dnsName string + multiaddr.ForEach(ma, func(c multiaddr.Component) bool { + switch c.Protocol().Code { + case multiaddr.P_IP4, multiaddr.P_IP6: + ip = c.Value() + return false // Stop iteration - found IP + case multiaddr.P_DNS4, multiaddr.P_DNS6, multiaddr.P_DNSADDR: + dnsName = c.Value() + // Continue to check for IP, but remember DNS name as fallback + } + return true + }) + + // If we found a direct IP, return it + if ip != "" { + return ip + } + + // If we found a DNS name, try to resolve it + if dnsName != "" { + if resolvedIPs, err := net.LookupIP(dnsName); err == nil && len(resolvedIPs) > 0 { + // Prefer IPv4 addresses, but accept IPv6 if that's all we have + for _, resolvedIP := range resolvedIPs { + if resolvedIP.To4() != nil { + return resolvedIP.String() + } + } + // Return first IPv6 address if no IPv4 found + return resolvedIPs[0].String() + } + } + + return "" +} + // bootstrapPeerSource returns a PeerSource that yields peers from BootstrapPeers. func bootstrapPeerSource(bootstrapAddrs []string, logger *zap.Logger) func(context.Context, int) <-chan peer.AddrInfo { return func(ctx context.Context, num int) <-chan peer.AddrInfo { @@ -688,10 +734,14 @@ func (n *Node) startIPFSClusterConfig() error { // If this is not the bootstrap node, try to update bootstrap peer info if n.config.Node.Type != "bootstrap" && len(n.config.Discovery.BootstrapPeers) > 0 { - // Try to find bootstrap cluster API URL from config - // For now, we'll discover it from the first bootstrap peer - // In a real scenario, you might want to configure this explicitly - bootstrapClusterAPI := "http://localhost:9094" // Default bootstrap cluster API + // Infer bootstrap cluster API URL from first bootstrap peer multiaddr + bootstrapClusterAPI := "http://localhost:9094" // Default fallback + if len(n.config.Discovery.BootstrapPeers) > 0 { + // Extract IP from first bootstrap peer multiaddr + if ip := extractIPFromMultiaddr(n.config.Discovery.BootstrapPeers[0]); ip != "" { + bootstrapClusterAPI = fmt.Sprintf("http://%s:9094", ip) + } + } if err := cm.UpdateBootstrapPeers(bootstrapClusterAPI); err != nil { n.logger.ComponentWarn(logging.ComponentNode, "Failed to update bootstrap peers, will retry later", zap.Error(err)) // Don't fail - peers can connect later via mDNS or manual config diff --git a/pkg/rqlite/cluster_discovery.go b/pkg/rqlite/cluster_discovery.go index 3e1b46b..c6b4e51 100644 --- a/pkg/rqlite/cluster_discovery.go +++ b/pkg/rqlite/cluster_discovery.go @@ -4,6 +4,8 @@ import ( "context" "encoding/json" "fmt" + "net" + "net/netip" "os" "path/filepath" "strings" @@ -13,6 +15,7 @@ import ( "github.com/DeBrosOfficial/network/pkg/discovery" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" + "github.com/multiformats/go-multiaddr" "go.uber.org/zap" ) @@ -159,18 +162,32 @@ func (c *ClusterDiscoveryService) collectPeerMetadata() []*discovery.RQLiteNodeM c.logger.Debug("Collecting peer metadata from LibP2P", zap.Int("connected_libp2p_peers", len(connectedPeers))) + c.mu.RLock() + currentRaftAddr := c.raftAddress + currentHTTPAddr := c.httpAddress + c.mu.RUnlock() + // Add ourselves ourMetadata := &discovery.RQLiteNodeMetadata{ - NodeID: c.raftAddress, // RQLite uses raft address as node ID - RaftAddress: c.raftAddress, - HTTPAddress: c.httpAddress, + NodeID: currentRaftAddr, // RQLite uses raft address as node ID + RaftAddress: currentRaftAddr, + HTTPAddress: currentHTTPAddr, NodeType: c.nodeType, RaftLogIndex: c.rqliteManager.getRaftLogIndex(), LastSeen: time.Now(), ClusterVersion: "1.0", } + + if c.adjustSelfAdvertisedAddresses(ourMetadata) { + c.logger.Debug("Adjusted self-advertised RQLite addresses", + zap.String("raft_address", ourMetadata.RaftAddress), + zap.String("http_address", ourMetadata.HTTPAddress)) + } + metadata = append(metadata, ourMetadata) + staleNodeIDs := make([]string, 0) + // Query connected peers for their RQLite metadata // For now, we'll use a simple approach - store metadata in peer metadata store // In a full implementation, this would use a custom protocol to exchange RQLite metadata @@ -181,6 +198,9 @@ func (c *ClusterDiscoveryService) collectPeerMetadata() []*discovery.RQLiteNodeM if jsonData, ok := val.([]byte); ok { var peerMeta discovery.RQLiteNodeMetadata if err := json.Unmarshal(jsonData, &peerMeta); err == nil { + if updated, stale := c.adjustPeerAdvertisedAddresses(peerID, &peerMeta); updated && stale != "" { + staleNodeIDs = append(staleNodeIDs, stale) + } peerMeta.LastSeen = time.Now() metadata = append(metadata, &peerMeta) } @@ -188,6 +208,16 @@ func (c *ClusterDiscoveryService) collectPeerMetadata() []*discovery.RQLiteNodeM } } + // Clean up stale entries if NodeID changed + if len(staleNodeIDs) > 0 { + c.mu.Lock() + for _, id := range staleNodeIDs { + delete(c.knownPeers, id) + delete(c.peerHealth, id) + } + c.mu.Unlock() + } + return metadata } @@ -366,6 +396,10 @@ func (c *ClusterDiscoveryService) getPeersJSONUnlocked() []map[string]interface{ peers := make([]map[string]interface{}, 0, len(c.knownPeers)) for _, peer := range c.knownPeers { + // Skip self - RQLite knows about itself, shouldn't be in peers.json + if peer.NodeID == c.raftAddress { + continue + } peerEntry := map[string]interface{}{ "id": peer.RaftAddress, // RQLite uses raft address as node ID "address": peer.RaftAddress, @@ -594,16 +628,28 @@ func (c *ClusterDiscoveryService) TriggerPeerExchange(ctx context.Context) error // UpdateOwnMetadata updates our own RQLite metadata in the peerstore func (c *ClusterDiscoveryService) UpdateOwnMetadata() { + c.mu.RLock() + currentRaftAddr := c.raftAddress + currentHTTPAddr := c.httpAddress + c.mu.RUnlock() + metadata := &discovery.RQLiteNodeMetadata{ - NodeID: c.raftAddress, // RQLite uses raft address as node ID - RaftAddress: c.raftAddress, - HTTPAddress: c.httpAddress, + NodeID: currentRaftAddr, // RQLite uses raft address as node ID + RaftAddress: currentRaftAddr, + HTTPAddress: currentHTTPAddr, NodeType: c.nodeType, RaftLogIndex: c.rqliteManager.getRaftLogIndex(), LastSeen: time.Now(), ClusterVersion: "1.0", } + // Adjust addresses if needed + if c.adjustSelfAdvertisedAddresses(metadata) { + c.logger.Debug("Adjusted self-advertised RQLite addresses in UpdateOwnMetadata", + zap.String("raft_address", metadata.RaftAddress), + zap.String("http_address", metadata.HTTPAddress)) + } + // Store in our own peerstore for peer exchange data, err := json.Marshal(metadata) if err != nil { @@ -623,6 +669,21 @@ func (c *ClusterDiscoveryService) UpdateOwnMetadata() { // StoreRemotePeerMetadata stores metadata received from a remote peer func (c *ClusterDiscoveryService) StoreRemotePeerMetadata(peerID peer.ID, metadata *discovery.RQLiteNodeMetadata) error { + if metadata == nil { + return fmt.Errorf("metadata is nil") + } + + // Adjust addresses if needed (replace localhost with actual IP) + if updated, stale := c.adjustPeerAdvertisedAddresses(peerID, metadata); updated && stale != "" { + // Clean up stale entry if NodeID changed + c.mu.Lock() + delete(c.knownPeers, stale) + delete(c.peerHealth, stale) + c.mu.Unlock() + } + + metadata.LastSeen = time.Now() + data, err := json.Marshal(metadata) if err != nil { return fmt.Errorf("failed to marshal metadata: %w", err) @@ -633,8 +694,239 @@ func (c *ClusterDiscoveryService) StoreRemotePeerMetadata(peerID peer.ID, metada } c.logger.Debug("Stored remote peer metadata", - zap.String("peer_id", peerID.String()[:8]+"..."), + zap.String("peer_id", shortPeerID(peerID)), zap.String("node_id", metadata.NodeID)) return nil } + +// adjustPeerAdvertisedAddresses adjusts peer metadata addresses by replacing localhost/loopback +// with the actual IP address from LibP2P connection. Returns (updated, staleNodeID). +// staleNodeID is non-empty if NodeID changed (indicating old entry should be cleaned up). +func (c *ClusterDiscoveryService) adjustPeerAdvertisedAddresses(peerID peer.ID, meta *discovery.RQLiteNodeMetadata) (bool, string) { + ip := c.selectPeerIP(peerID) + if ip == "" { + return false, "" + } + + changed, stale := rewriteAdvertisedAddresses(meta, ip, true) + if changed { + c.logger.Debug("Normalized peer advertised RQLite addresses", + zap.String("peer_id", shortPeerID(peerID)), + zap.String("raft_address", meta.RaftAddress), + zap.String("http_address", meta.HTTPAddress)) + } + return changed, stale +} + +// adjustSelfAdvertisedAddresses adjusts our own metadata addresses by replacing localhost/loopback +// with the actual IP address from LibP2P host. Updates internal state if changed. +func (c *ClusterDiscoveryService) adjustSelfAdvertisedAddresses(meta *discovery.RQLiteNodeMetadata) bool { + ip := c.selectSelfIP() + if ip == "" { + return false + } + + changed, _ := rewriteAdvertisedAddresses(meta, ip, true) + if !changed { + return false + } + + // Update internal state with corrected addresses + c.mu.Lock() + c.raftAddress = meta.RaftAddress + c.httpAddress = meta.HTTPAddress + c.mu.Unlock() + return true +} + +// selectPeerIP selects the best IP address for a peer from LibP2P connections. +// Prefers public IPs, falls back to private IPs if no public IP is available. +func (c *ClusterDiscoveryService) selectPeerIP(peerID peer.ID) string { + var fallback string + + // First, try to get IP from active connections + for _, conn := range c.host.Network().ConnsToPeer(peerID) { + if ip, public := ipFromMultiaddr(conn.RemoteMultiaddr()); ip != "" { + if shouldReplaceHost(ip) { + continue + } + if public { + return ip + } + if fallback == "" { + fallback = ip + } + } + } + + // Fallback to peerstore addresses + for _, addr := range c.host.Peerstore().Addrs(peerID) { + if ip, public := ipFromMultiaddr(addr); ip != "" { + if shouldReplaceHost(ip) { + continue + } + if public { + return ip + } + if fallback == "" { + fallback = ip + } + } + } + + return fallback +} + +// selectSelfIP selects the best IP address for ourselves from LibP2P host addresses. +// Prefers public IPs, falls back to private IPs if no public IP is available. +func (c *ClusterDiscoveryService) selectSelfIP() string { + var fallback string + + for _, addr := range c.host.Addrs() { + if ip, public := ipFromMultiaddr(addr); ip != "" { + if shouldReplaceHost(ip) { + continue + } + if public { + return ip + } + if fallback == "" { + fallback = ip + } + } + } + + return fallback +} + +// rewriteAdvertisedAddresses rewrites RaftAddress and HTTPAddress in metadata, +// replacing localhost/loopback addresses with the provided IP. +// Returns (changed, staleNodeID). staleNodeID is non-empty if NodeID changed. +func rewriteAdvertisedAddresses(meta *discovery.RQLiteNodeMetadata, newHost string, allowNodeIDRewrite bool) (bool, string) { + if meta == nil || newHost == "" { + return false, "" + } + + originalNodeID := meta.NodeID + changed := false + nodeIDChanged := false + + // Replace host in RaftAddress if it's localhost/loopback + if newAddr, replaced := replaceAddressHost(meta.RaftAddress, newHost); replaced { + if meta.RaftAddress != newAddr { + meta.RaftAddress = newAddr + changed = true + } + } + + // Replace host in HTTPAddress if it's localhost/loopback + if newAddr, replaced := replaceAddressHost(meta.HTTPAddress, newHost); replaced { + if meta.HTTPAddress != newAddr { + meta.HTTPAddress = newAddr + changed = true + } + } + + // Update NodeID to match RaftAddress if it changed + if allowNodeIDRewrite { + if meta.RaftAddress != "" && (meta.NodeID == "" || meta.NodeID == originalNodeID || shouldReplaceHost(hostFromAddress(meta.NodeID))) { + if meta.NodeID != meta.RaftAddress { + meta.NodeID = meta.RaftAddress + nodeIDChanged = meta.NodeID != originalNodeID + if nodeIDChanged { + changed = true + } + } + } + } + + if nodeIDChanged { + return changed, originalNodeID + } + return changed, "" +} + +// replaceAddressHost replaces the host part of an address if it's localhost/loopback. +// Returns (newAddress, replaced). replaced is true if host was replaced. +func replaceAddressHost(address, newHost string) (string, bool) { + if address == "" || newHost == "" { + return address, false + } + + host, port, err := net.SplitHostPort(address) + if err != nil { + return address, false + } + + if !shouldReplaceHost(host) { + return address, false + } + + return net.JoinHostPort(newHost, port), true +} + +// shouldReplaceHost returns true if the host should be replaced (localhost, loopback, etc.) +func shouldReplaceHost(host string) bool { + if host == "" { + return true + } + if strings.EqualFold(host, "localhost") { + return true + } + + // Check if it's a loopback or unspecified address + if addr, err := netip.ParseAddr(host); err == nil { + if addr.IsLoopback() || addr.IsUnspecified() { + return true + } + } + + return false +} + +// hostFromAddress extracts the host part from a host:port address +func hostFromAddress(address string) string { + host, _, err := net.SplitHostPort(address) + if err != nil { + return "" + } + return host +} + +// ipFromMultiaddr extracts an IP address from a multiaddr and returns (ip, isPublic) +func ipFromMultiaddr(addr multiaddr.Multiaddr) (string, bool) { + if addr == nil { + return "", false + } + + if v4, err := addr.ValueForProtocol(multiaddr.P_IP4); err == nil { + return v4, isPublicIP(v4) + } + if v6, err := addr.ValueForProtocol(multiaddr.P_IP6); err == nil { + return v6, isPublicIP(v6) + } + return "", false +} + +// isPublicIP returns true if the IP is a public (non-private, non-loopback) address +func isPublicIP(ip string) bool { + addr, err := netip.ParseAddr(ip) + if err != nil { + return false + } + // Exclude loopback, unspecified, link-local, multicast, and private addresses + if addr.IsLoopback() || addr.IsUnspecified() || addr.IsLinkLocalUnicast() || addr.IsLinkLocalMulticast() || addr.IsPrivate() { + return false + } + return true +} + +// shortPeerID returns a shortened version of a peer ID for logging +func shortPeerID(id peer.ID) string { + s := id.String() + if len(s) <= 8 { + return s + } + return s[:8] + "..." +} diff --git a/pkg/rqlite/rqlite.go b/pkg/rqlite/rqlite.go index 75e5349..5d3ca10 100644 --- a/pkg/rqlite/rqlite.go +++ b/pkg/rqlite/rqlite.go @@ -112,8 +112,12 @@ func (r *RQLiteManager) Start(ctx context.Context) error { return err } - // Apply migrations - migrationsDir := "migrations" + // Apply migrations - resolve path for production vs development + migrationsDir, err := r.resolveMigrationsDir() + if err != nil { + r.logger.Error("Failed to resolve migrations directory", zap.Error(err)) + return fmt.Errorf("resolve migrations directory: %w", err) + } if err := r.ApplyMigrations(ctx, migrationsDir); err != nil { r.logger.Error("Migrations failed", zap.Error(err), zap.String("dir", migrationsDir)) return fmt.Errorf("apply migrations: %w", err) @@ -139,6 +143,23 @@ func (r *RQLiteManager) rqliteDataDirPath() (string, error) { return filepath.Join(dataDir, "rqlite"), nil } +// resolveMigrationsDir resolves the migrations directory path for production vs development +// In production, migrations are at /home/debros/src/migrations +// In development, migrations are relative to the project root (migrations/) +func (r *RQLiteManager) resolveMigrationsDir() (string, error) { + // Check for production path first: /home/debros/src/migrations + productionPath := "/home/debros/src/migrations" + if _, err := os.Stat(productionPath); err == nil { + r.logger.Info("Using production migrations directory", zap.String("path", productionPath)) + return productionPath, nil + } + + // Fall back to relative path for development + devPath := "migrations" + r.logger.Info("Using development migrations directory", zap.String("path", devPath)) + return devPath, nil +} + // prepareDataDir expands and creates the RQLite data directory func (r *RQLiteManager) prepareDataDir() (string, error) { rqliteDataDir, err := r.rqliteDataDirPath() @@ -176,10 +197,13 @@ func (r *RQLiteManager) launchProcess(ctx context.Context, rqliteDataDir string) joinArg = strings.TrimPrefix(joinArg, "https://") } - // Wait for join target to become reachable to avoid forming a separate cluster (wait indefinitely) - if err := r.waitForJoinTarget(ctx, r.config.RQLiteJoinAddress, 0); err != nil { + // Wait for join target to become reachable to avoid forming a separate cluster + // Use 5 minute timeout to prevent infinite waits on bad configurations + joinTimeout := 5 * time.Minute + if err := r.waitForJoinTarget(ctx, r.config.RQLiteJoinAddress, joinTimeout); err != nil { r.logger.Warn("Join target did not become reachable within timeout; will still attempt to join", zap.String("join_address", r.config.RQLiteJoinAddress), + zap.Duration("timeout", joinTimeout), zap.Error(err)) } diff --git a/scripts/install-debros-network.sh b/scripts/install-debros-network.sh index 5a32818..4526fe3 100755 --- a/scripts/install-debros-network.sh +++ b/scripts/install-debros-network.sh @@ -15,6 +15,7 @@ # bash scripts/install-debros-network.sh --domain example.com set -e +set -o pipefail trap 'error "An error occurred. Installation aborted."; exit 1' ERR # Color codes @@ -100,19 +101,30 @@ check_root() { get_latest_release() { log "Fetching latest release..." + # Try to get latest release with better error handling + RELEASE_DATA="" if command -v jq &>/dev/null; then # Get the latest release (including pre-releases/nightly) - LATEST_RELEASE=$(curl -fsSL -H "Accept: application/vnd.github+json" "$GITHUB_API/releases" | \ - jq -r '.[0] | .tag_name') + RELEASE_DATA=$(curl -fsSL -H "Accept: application/vnd.github+json" "$GITHUB_API/releases" 2>&1) + if [ $? -ne 0 ]; then + error "Failed to fetch release data from GitHub API" + error "Response: $RELEASE_DATA" + exit 1 + fi + LATEST_RELEASE=$(echo "$RELEASE_DATA" | jq -r '.[0] | .tag_name' 2>/dev/null) else - LATEST_RELEASE=$(curl -fsSL "$GITHUB_API/releases" | \ - grep '"tag_name"' | \ - head -1 | \ - cut -d'"' -f4) + RELEASE_DATA=$(curl -fsSL "$GITHUB_API/releases" 2>&1) + if [ $? -ne 0 ]; then + error "Failed to fetch release data from GitHub API" + error "Response: $RELEASE_DATA" + exit 1 + fi + LATEST_RELEASE=$(echo "$RELEASE_DATA" | grep '"tag_name"' | head -1 | cut -d'"' -f4) fi - if [ -z "$LATEST_RELEASE" ]; then + if [ -z "$LATEST_RELEASE" ] || [ "$LATEST_RELEASE" = "null" ]; then error "Could not determine latest release version" + error "GitHub API response may be empty or rate-limited" exit 1 fi