feat: enhance production command handling and configuration management

- Updated the production command to support dynamic IP address extraction from bootstrap peer multiaddrs, improving node connectivity.
- Refactored the configuration generation to include advertised HTTP and Raft addresses based on the node type and bootstrap peers.
- Enhanced error handling and logging for service management commands, ensuring better feedback during installation and upgrades.
- Improved the README documentation with new command examples and clarified usage instructions for production deployment.
This commit is contained in:
anonpenguin23 2025-11-12 09:14:26 +02:00
parent 0ca211c983
commit 32470052ba
No known key found for this signature in database
GPG Key ID: 1CBB1FE35AFBEE30
16 changed files with 1209 additions and 174 deletions

View File

@ -13,6 +13,27 @@ The format is based on [Keep a Changelog][keepachangelog] and adheres to [Semant
### Deprecated ### Deprecated
### Fixed ### Fixed
## [0.69.6] - 2025-11-12
### Added
- Improved production service health checks and port availability validation during install, upgrade, start, and restart commands.
- Added service aliases (node, ipfs, cluster, gateway, olric) to `dbn prod logs` command for easier log viewing.
### Changed
- Updated node configuration logic to correctly advertise public IP addresses in multiaddrs (for P2P discovery) and RQLite addresses, improving connectivity for nodes behind NAT/firewalls.
- Enhanced `dbn prod install` and `dbn prod upgrade` to automatically detect and preserve existing VPS IP, domain, and cluster join information.
- Improved RQLite cluster discovery to automatically replace localhost/loopback addresses with the actual public IP when exchanging metadata between peers.
- Updated `dbn prod install` to require `--vps-ip` for all node types (bootstrap and regular) for proper network configuration.
- Improved error handling and robustness in the installation script when fetching the latest release from GitHub.
### Deprecated
### Removed
### Fixed
- Fixed an issue where the RQLite process would wait indefinitely for a join target; now uses a 5-minute timeout.
- Corrected the location of the gateway configuration file reference in the README.
## [0.69.5] - 2025-11-11 ## [0.69.5] - 2025-11-11
### Added ### Added

View File

@ -19,7 +19,7 @@ test-e2e:
.PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill .PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill
VERSION := 0.69.5 VERSION := 0.69.6
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)'

View File

@ -217,7 +217,7 @@ Currently, the `upgrade` command doesn't support `--domain` flag. To enable HTTP
1. **Edit the gateway configuration:** 1. **Edit the gateway configuration:**
```bash ```bash
sudo nano /home/debros/.debros/configs/gateway.yaml sudo nano /home/debros/.debros/data/gateway.yaml
``` ```
2. **Update the configuration:** 2. **Update the configuration:**
@ -358,6 +358,9 @@ sudo systemctl enable debros-*
# Install bootstrap node # Install bootstrap node
sudo dbn prod install --bootstrap [--domain DOMAIN] [--branch BRANCH] sudo dbn prod install --bootstrap [--domain DOMAIN] [--branch BRANCH]
sudo dbn prod install --nightly --domain node-gh38V1.debros.network --vps-ip 57.128.223.92 --ignore-resource-checks --bootstrap-join
# Install secondary node # Install secondary node
sudo dbn prod install --vps-ip IP --peers ADDRS [--domain DOMAIN] [--branch BRANCH] sudo dbn prod install --vps-ip IP --peers ADDRS [--domain DOMAIN] [--branch BRANCH]

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"flag" "flag"
"fmt" "fmt"
"net"
"os" "os"
"os/signal" "os/signal"
"path/filepath" "path/filepath"
@ -137,7 +138,26 @@ func startNode(ctx context.Context, cfg *config.Config, port int) error {
// Save the peer ID to a file for CLI access (especially useful for bootstrap) // Save the peer ID to a file for CLI access (especially useful for bootstrap)
peerID := n.GetPeerID() peerID := n.GetPeerID()
peerInfoFile := filepath.Join(dataDir, "peer.info") peerInfoFile := filepath.Join(dataDir, "peer.info")
peerMultiaddr := fmt.Sprintf("/ip4/0.0.0.0/tcp/%d/p2p/%s", port, peerID)
// Extract advertise IP from config (prefer http_adv_address, fallback to raft_adv_address)
advertiseIP := "0.0.0.0" // Default fallback
if cfg.Discovery.HttpAdvAddress != "" {
if host, _, err := net.SplitHostPort(cfg.Discovery.HttpAdvAddress); err == nil && host != "" && host != "localhost" {
advertiseIP = host
}
} else if cfg.Discovery.RaftAdvAddress != "" {
if host, _, err := net.SplitHostPort(cfg.Discovery.RaftAdvAddress); err == nil && host != "" && host != "localhost" {
advertiseIP = host
}
}
// Determine IP protocol (IPv4 or IPv6) for multiaddr
ipProtocol := "ip4"
if ip := net.ParseIP(advertiseIP); ip != nil && ip.To4() == nil {
ipProtocol = "ip6"
}
peerMultiaddr := fmt.Sprintf("/%s/%s/tcp/%d/p2p/%s", ipProtocol, advertiseIP, port, peerID)
if err := os.WriteFile(peerInfoFile, []byte(peerMultiaddr), 0644); err != nil { if err := os.WriteFile(peerInfoFile, []byte(peerMultiaddr), 0644); err != nil {
logger.Error("Failed to save peer info: %v", zap.Error(err)) logger.Error("Failed to save peer info: %v", zap.Error(err))

View File

@ -245,12 +245,24 @@ func HandlePubSubCommand(args []string, format string, timeout time.Duration) {
func createClient() (client.NetworkClient, error) { func createClient() (client.NetworkClient, error) {
config := client.DefaultClientConfig("dbn") config := client.DefaultClientConfig("dbn")
// Use active environment's gateway URL
gatewayURL := getGatewayURL()
config.GatewayURL = gatewayURL
// Try to get bootstrap peers from active environment
// For now, we'll use the default bootstrap peers from config
// In the future, environments could specify their own bootstrap peers
env, err := GetActiveEnvironment()
if err == nil && env != nil {
// Environment loaded successfully - gateway URL already set above
// Bootstrap peers could be added to Environment struct in the future
_ = env // Use env if we add bootstrap peers to it
}
// Check for existing credentials using enhanced authentication // Check for existing credentials using enhanced authentication
creds, err := auth.GetValidEnhancedCredentials() creds, err := auth.GetValidEnhancedCredentials()
if err != nil { if err != nil {
// No valid credentials found, use the enhanced authentication flow // No valid credentials found, use the enhanced authentication flow
gatewayURL := getGatewayURL()
newCreds, authErr := auth.GetOrPromptForCredentials(gatewayURL) newCreds, authErr := auth.GetOrPromptForCredentials(gatewayURL)
if authErr != nil { if authErr != nil {
return nil, fmt.Errorf("authentication failed: %w", authErr) return nil, fmt.Errorf("authentication failed: %w", authErr)

View File

@ -2,16 +2,54 @@ package cli
import ( import (
"bufio" "bufio"
"errors"
"flag"
"fmt" "fmt"
"net"
"net/http"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings" "strings"
"syscall"
"time" "time"
"github.com/DeBrosOfficial/network/pkg/environments/production" "github.com/DeBrosOfficial/network/pkg/environments/production"
"github.com/multiformats/go-multiaddr"
) )
// normalizeBootstrapPeers normalizes and validates bootstrap peer multiaddrs
func normalizeBootstrapPeers(peersStr string) ([]string, error) {
if peersStr == "" {
return nil, nil
}
// Split by comma and trim whitespace
rawPeers := strings.Split(peersStr, ",")
peers := make([]string, 0, len(rawPeers))
seen := make(map[string]bool)
for _, peer := range rawPeers {
peer = strings.TrimSpace(peer)
if peer == "" {
continue
}
// Validate multiaddr format
if _, err := multiaddr.NewMultiaddr(peer); err != nil {
return nil, fmt.Errorf("invalid multiaddr %q: %w", peer, err)
}
// Deduplicate
if !seen[peer] {
peers = append(peers, peer)
seen[peer] = true
}
}
return peers, nil
}
// HandleProdCommand handles production environment commands // HandleProdCommand handles production environment commands
func HandleProdCommand(args []string) { func HandleProdCommand(args []string) {
if len(args) == 0 { if len(args) == 0 {
@ -57,7 +95,7 @@ func showProdHelp() {
fmt.Printf(" --force - Reconfigure all settings\n") fmt.Printf(" --force - Reconfigure all settings\n")
fmt.Printf(" --bootstrap - Install as bootstrap node\n") fmt.Printf(" --bootstrap - Install as bootstrap node\n")
fmt.Printf(" --vps-ip IP - VPS public IP address (required for non-bootstrap)\n") fmt.Printf(" --vps-ip IP - VPS public IP address (required for non-bootstrap)\n")
fmt.Printf(" --peers ADDRS - Comma-separated bootstrap peers (for non-bootstrap)\n") fmt.Printf(" --peers ADDRS - Comma-separated bootstrap peer multiaddrs (required for non-bootstrap)\n")
fmt.Printf(" --bootstrap-join ADDR - Bootstrap raft join address (for secondary bootstrap)\n") fmt.Printf(" --bootstrap-join ADDR - Bootstrap raft join address (for secondary bootstrap)\n")
fmt.Printf(" --domain DOMAIN - Domain for HTTPS (optional)\n") fmt.Printf(" --domain DOMAIN - Domain for HTTPS (optional)\n")
fmt.Printf(" --branch BRANCH - Git branch to use (main or nightly, default: main)\n") fmt.Printf(" --branch BRANCH - Git branch to use (main or nightly, default: main)\n")
@ -72,6 +110,7 @@ func showProdHelp() {
fmt.Printf(" stop - Stop all production services (requires root/sudo)\n") fmt.Printf(" stop - Stop all production services (requires root/sudo)\n")
fmt.Printf(" restart - Restart all production services (requires root/sudo)\n") fmt.Printf(" restart - Restart all production services (requires root/sudo)\n")
fmt.Printf(" logs <service> - View production service logs\n") fmt.Printf(" logs <service> - View production service logs\n")
fmt.Printf(" Service aliases: node, ipfs, cluster, gateway, olric\n")
fmt.Printf(" Options:\n") fmt.Printf(" Options:\n")
fmt.Printf(" --follow - Follow logs in real-time\n") fmt.Printf(" --follow - Follow logs in real-time\n")
fmt.Printf(" uninstall - Remove production services (requires root/sudo)\n\n") fmt.Printf(" uninstall - Remove production services (requires root/sudo)\n\n")
@ -83,7 +122,7 @@ func showProdHelp() {
fmt.Printf(" # Join existing cluster\n") fmt.Printf(" # Join existing cluster\n")
fmt.Printf(" sudo dbn prod install --vps-ip 10.0.0.2 --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...\n\n") fmt.Printf(" sudo dbn prod install --vps-ip 10.0.0.2 --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...\n\n")
fmt.Printf(" # Secondary bootstrap joining existing cluster\n") fmt.Printf(" # Secondary bootstrap joining existing cluster\n")
fmt.Printf(" sudo dbn prod install --bootstrap --vps-ip 10.0.0.2 --bootstrap-join 10.0.0.1:7001\n\n") fmt.Printf(" sudo dbn prod install --bootstrap --vps-ip 10.0.0.2 --bootstrap-join 10.0.0.1:7001 --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...\n\n")
fmt.Printf(" # Upgrade using saved branch preference\n") fmt.Printf(" # Upgrade using saved branch preference\n")
fmt.Printf(" sudo dbn prod upgrade --restart\n\n") fmt.Printf(" sudo dbn prod upgrade --restart\n\n")
fmt.Printf(" # Upgrade and switch to nightly branch\n") fmt.Printf(" # Upgrade and switch to nightly branch\n")
@ -96,61 +135,43 @@ func showProdHelp() {
fmt.Printf(" sudo dbn prod restart\n\n") fmt.Printf(" sudo dbn prod restart\n\n")
fmt.Printf(" dbn prod status\n") fmt.Printf(" dbn prod status\n")
fmt.Printf(" dbn prod logs node --follow\n") fmt.Printf(" dbn prod logs node --follow\n")
fmt.Printf(" dbn prod logs gateway --follow\n")
} }
func handleProdInstall(args []string) { func handleProdInstall(args []string) {
// Parse arguments // Parse arguments using flag.FlagSet
force := false fs := flag.NewFlagSet("install", flag.ContinueOnError)
isBootstrap := false fs.SetOutput(os.Stderr)
skipResourceChecks := false
var vpsIP, domain, peersStr, bootstrapJoin, branch string
for i, arg := range args { force := fs.Bool("force", false, "Reconfigure all settings")
switch arg { isBootstrap := fs.Bool("bootstrap", false, "Install as bootstrap node")
case "--force": skipResourceChecks := fs.Bool("ignore-resource-checks", false, "Skip disk/RAM/CPU prerequisite validation")
force = true vpsIP := fs.String("vps-ip", "", "VPS public IP address (required for non-bootstrap)")
case "--bootstrap": domain := fs.String("domain", "", "Domain for HTTPS (optional)")
isBootstrap = true peersStr := fs.String("peers", "", "Comma-separated bootstrap peer multiaddrs (required for non-bootstrap)")
case "--peers": bootstrapJoin := fs.String("bootstrap-join", "", "Bootstrap raft join address (for secondary bootstrap)")
if i+1 < len(args) { branch := fs.String("branch", "main", "Git branch to use (main or nightly)")
peersStr = args[i+1]
} if err := fs.Parse(args); err != nil {
case "--vps-ip": if err == flag.ErrHelp {
if i+1 < len(args) { return
vpsIP = args[i+1]
}
case "--domain":
if i+1 < len(args) {
domain = args[i+1]
}
case "--bootstrap-join":
if i+1 < len(args) {
bootstrapJoin = args[i+1]
}
case "--branch":
if i+1 < len(args) {
branch = args[i+1]
}
case "--ignore-resource-checks":
skipResourceChecks = true
} }
} fmt.Fprintf(os.Stderr, "❌ Failed to parse flags: %v\n", err)
// Validate branch if provided
if branch != "" && branch != "main" && branch != "nightly" {
fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", branch)
os.Exit(1) os.Exit(1)
} }
// Default to main if not specified // Validate branch
if branch == "" { if *branch != "main" && *branch != "nightly" {
branch = "main" fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", *branch)
os.Exit(1)
} }
// Parse bootstrap peers if provided // Normalize and validate bootstrap peers
var bootstrapPeers []string bootstrapPeers, err := normalizeBootstrapPeers(*peersStr)
if peersStr != "" { if err != nil {
bootstrapPeers = strings.Split(peersStr, ",") fmt.Fprintf(os.Stderr, "❌ Invalid bootstrap peers: %v\n", err)
fmt.Fprintf(os.Stderr, " Example: --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...,/ip4/10.0.0.2/tcp/4001/p2p/Qm...\n")
os.Exit(1)
} }
// Validate setup requirements // Validate setup requirements
@ -159,19 +180,50 @@ func handleProdInstall(args []string) {
os.Exit(1) os.Exit(1)
} }
// Enforce --vps-ip for non-bootstrap nodes // Validate bootstrap node requirements
if !isBootstrap && vpsIP == "" { if *isBootstrap {
fmt.Fprintf(os.Stderr, "❌ --vps-ip is required for non-bootstrap nodes\n") if *vpsIP == "" {
fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --vps-ip <public_ip> --peers <multiaddr>\n") fmt.Fprintf(os.Stderr, "❌ --vps-ip is required for bootstrap nodes\n")
os.Exit(1) fmt.Fprintf(os.Stderr, " Bootstrap nodes must advertise a public IP address for other nodes to connect\n")
fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --bootstrap --vps-ip <public_ip>\n")
fmt.Fprintf(os.Stderr, " Example: sudo dbn prod install --bootstrap --vps-ip 203.0.113.1\n")
os.Exit(1)
}
// Validate secondary bootstrap requirements
if *bootstrapJoin == "" {
fmt.Fprintf(os.Stderr, "⚠️ Warning: Primary bootstrap node detected (--bootstrap without --bootstrap-join)\n")
fmt.Fprintf(os.Stderr, " This node will form a new cluster. To join existing cluster as secondary bootstrap:\n")
fmt.Fprintf(os.Stderr, " sudo dbn prod install --bootstrap --vps-ip %s --bootstrap-join <bootstrap_ip>:7001 --peers <multiaddr>\n", *vpsIP)
}
}
// Validate non-bootstrap node requirements
if !*isBootstrap {
if *vpsIP == "" {
fmt.Fprintf(os.Stderr, "❌ --vps-ip is required for non-bootstrap nodes\n")
fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --vps-ip <public_ip> --peers <multiaddr>\n")
os.Exit(1)
}
if len(bootstrapPeers) == 0 {
fmt.Fprintf(os.Stderr, "❌ --peers is required for non-bootstrap nodes\n")
fmt.Fprintf(os.Stderr, " Usage: sudo dbn prod install --vps-ip <public_ip> --peers <multiaddr>\n")
fmt.Fprintf(os.Stderr, " Example: --peers /ip4/10.0.0.1/tcp/4001/p2p/Qm...\n")
os.Exit(1)
}
} }
debrosHome := "/home/debros" debrosHome := "/home/debros"
debrosDir := debrosHome + "/.debros" debrosDir := debrosHome + "/.debros"
setup := production.NewProductionSetup(debrosHome, os.Stdout, force, branch, false, skipResourceChecks) setup := production.NewProductionSetup(debrosHome, os.Stdout, *force, *branch, false, *skipResourceChecks)
// Check port availability before proceeding
if err := ensurePortsAvailable("prod install", defaultPorts()); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
// Save branch preference for future upgrades // Save branch preference for future upgrades
if err := production.SaveBranchPreference(debrosDir, branch); err != nil { if err := production.SaveBranchPreference(debrosDir, *branch); err != nil {
fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to save branch preference: %v\n", err) fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to save branch preference: %v\n", err)
} }
@ -198,14 +250,14 @@ func handleProdInstall(args []string) {
// Determine node type early // Determine node type early
nodeType := "node" nodeType := "node"
if isBootstrap { if *isBootstrap {
nodeType = "bootstrap" nodeType = "bootstrap"
} }
// Phase 3: Generate secrets FIRST (before service initialization) // Phase 3: Generate secrets FIRST (before service initialization)
// This ensures cluster secret and swarm key exist before repos are seeded // This ensures cluster secret and swarm key exist before repos are seeded
fmt.Printf("\n🔐 Phase 3: Generating secrets...\n") fmt.Printf("\n🔐 Phase 3: Generating secrets...\n")
if err := setup.Phase3GenerateSecrets(isBootstrap); err != nil { if err := setup.Phase3GenerateSecrets(*isBootstrap); err != nil {
fmt.Fprintf(os.Stderr, "❌ Secret generation failed: %v\n", err) fmt.Fprintf(os.Stderr, "❌ Secret generation failed: %v\n", err)
os.Exit(1) os.Exit(1)
} }
@ -219,56 +271,70 @@ func handleProdInstall(args []string) {
// Phase 4: Generate configs // Phase 4: Generate configs
fmt.Printf("\n⚙ Phase 4: Generating configurations...\n") fmt.Printf("\n⚙ Phase 4: Generating configurations...\n")
enableHTTPS := domain != "" enableHTTPS := *domain != ""
if err := setup.Phase4GenerateConfigs(isBootstrap, bootstrapPeers, vpsIP, enableHTTPS, domain, bootstrapJoin); err != nil { if err := setup.Phase4GenerateConfigs(*isBootstrap, bootstrapPeers, *vpsIP, enableHTTPS, *domain, *bootstrapJoin); err != nil {
fmt.Fprintf(os.Stderr, "❌ Configuration generation failed: %v\n", err) fmt.Fprintf(os.Stderr, "❌ Configuration generation failed: %v\n", err)
os.Exit(1) os.Exit(1)
} }
// Phase 5: Create systemd services // Phase 5: Create systemd services
fmt.Printf("\n🔧 Phase 5: Creating systemd services...\n") fmt.Printf("\n🔧 Phase 5: Creating systemd services...\n")
if err := setup.Phase5CreateSystemdServices(nodeType, vpsIP); err != nil { if err := setup.Phase5CreateSystemdServices(nodeType, *vpsIP); err != nil {
fmt.Fprintf(os.Stderr, "❌ Service creation failed: %v\n", err) fmt.Fprintf(os.Stderr, "❌ Service creation failed: %v\n", err)
os.Exit(1) os.Exit(1)
} }
// Give services a moment to fully initialize before verification
fmt.Printf("\n⏳ Waiting for services to initialize...\n")
time.Sleep(5 * time.Second)
// Verify all services are running correctly
if err := verifyProductionRuntime("prod install"); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
fmt.Fprintf(os.Stderr, " Installation completed but services are not healthy. Check logs with: dbn prod logs <service>\n")
os.Exit(1)
}
// Log completion with actual peer ID // Log completion with actual peer ID
setup.LogSetupComplete(setup.NodePeerID) setup.LogSetupComplete(setup.NodePeerID)
fmt.Printf("✅ Production installation complete!\n\n") fmt.Printf("✅ Production installation complete and healthy!\n\n")
} }
func handleProdUpgrade(args []string) { func handleProdUpgrade(args []string) {
// Parse arguments // Parse arguments using flag.FlagSet
force := false fs := flag.NewFlagSet("upgrade", flag.ContinueOnError)
restartServices := false fs.SetOutput(os.Stderr)
noPull := false
branch := "" force := fs.Bool("force", false, "Reconfigure all settings")
for i, arg := range args { restartServices := fs.Bool("restart", false, "Automatically restart services after upgrade")
if arg == "--force" { noPull := fs.Bool("no-pull", false, "Skip git clone/pull, use existing /home/debros/src")
force = true branch := fs.String("branch", "", "Git branch to use (main or nightly, uses saved preference if not specified)")
}
if arg == "--restart" { // Support legacy flags for backwards compatibility
restartServices = true fs.Bool("nightly", false, "Use nightly branch (deprecated, use --branch nightly)")
} fs.Bool("main", false, "Use main branch (deprecated, use --branch main)")
if arg == "--no-pull" {
noPull = true if err := fs.Parse(args); err != nil {
} if err == flag.ErrHelp {
if arg == "--nightly" { return
branch = "nightly"
}
if arg == "--main" {
branch = "main"
}
if arg == "--branch" {
if i+1 < len(args) {
branch = args[i+1]
}
} }
fmt.Fprintf(os.Stderr, "❌ Failed to parse flags: %v\n", err)
os.Exit(1)
}
// Handle legacy flags
nightlyFlag := fs.Lookup("nightly")
mainFlag := fs.Lookup("main")
if nightlyFlag != nil && nightlyFlag.Value.String() == "true" {
*branch = "nightly"
}
if mainFlag != nil && mainFlag.Value.String() == "true" {
*branch = "main"
} }
// Validate branch if provided // Validate branch if provided
if branch != "" && branch != "main" && branch != "nightly" { if *branch != "" && *branch != "main" && *branch != "nightly" {
fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", branch) fmt.Fprintf(os.Stderr, "❌ Invalid branch: %s (must be 'main' or 'nightly')\n", *branch)
os.Exit(1) os.Exit(1)
} }
@ -283,20 +349,20 @@ func handleProdUpgrade(args []string) {
fmt.Printf(" This will preserve existing configurations and data\n") fmt.Printf(" This will preserve existing configurations and data\n")
fmt.Printf(" Configurations will be updated to latest format\n\n") fmt.Printf(" Configurations will be updated to latest format\n\n")
setup := production.NewProductionSetup(debrosHome, os.Stdout, force, branch, noPull, false) setup := production.NewProductionSetup(debrosHome, os.Stdout, *force, *branch, *noPull, false)
// Log if --no-pull is enabled // Log if --no-pull is enabled
if noPull { if *noPull {
fmt.Printf(" ⚠️ --no-pull flag enabled: Skipping git clone/pull\n") fmt.Printf(" ⚠️ --no-pull flag enabled: Skipping git clone/pull\n")
fmt.Printf(" Using existing repository at %s/src\n", debrosHome) fmt.Printf(" Using existing repository at %s/src\n", debrosHome)
} }
// If branch was explicitly provided, save it for future upgrades // If branch was explicitly provided, save it for future upgrades
if branch != "" { if *branch != "" {
if err := production.SaveBranchPreference(debrosDir, branch); err != nil { if err := production.SaveBranchPreference(debrosDir, *branch); err != nil {
fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to save branch preference: %v\n", err) fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to save branch preference: %v\n", err)
} else { } else {
fmt.Printf(" Using branch: %s (saved for future upgrades)\n", branch) fmt.Printf(" Using branch: %s (saved for future upgrades)\n", *branch)
} }
} else { } else {
// Show which branch is being used (read from saved preference) // Show which branch is being used (read from saved preference)
@ -347,6 +413,12 @@ func handleProdUpgrade(args []string) {
time.Sleep(2 * time.Second) time.Sleep(2 * time.Second)
} }
// Check port availability after stopping services
if err := ensurePortsAvailable("prod upgrade", defaultPorts()); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
// Phase 2b: Install/update binaries // Phase 2b: Install/update binaries
fmt.Printf("\nPhase 2b: Installing/updating binaries...\n") fmt.Printf("\nPhase 2b: Installing/updating binaries...\n")
if err := setup.Phase2bInstallBinaries(); err != nil { if err := setup.Phase2bInstallBinaries(); err != nil {
@ -438,22 +510,37 @@ func handleProdUpgrade(args []string) {
// Extract bootstrap peers from existing node config // Extract bootstrap peers from existing node config
bootstrapPeers := extractBootstrapPeers(nodeConfigPath) bootstrapPeers := extractBootstrapPeers(nodeConfigPath)
// Extract bootstrap join address if it's a bootstrap node // Extract VPS IP from advertise addresses and bootstrap join address
if nodeType == "bootstrap" { vpsIP := ""
if data, err := os.ReadFile(nodeConfigPath); err == nil { if data, err := os.ReadFile(nodeConfigPath); err == nil {
configStr := string(data) configStr := string(data)
for _, line := range strings.Split(configStr, "\n") { for _, line := range strings.Split(configStr, "\n") {
trimmed := strings.TrimSpace(line) trimmed := strings.TrimSpace(line)
if strings.HasPrefix(trimmed, "rqlite_join_address:") { // Try to extract VPS IP from http_adv_address or raft_adv_address
parts := strings.SplitN(trimmed, ":", 2) // Only set if not already found (first valid IP wins)
if len(parts) > 1 { if vpsIP == "" && (strings.HasPrefix(trimmed, "http_adv_address:") || strings.HasPrefix(trimmed, "raft_adv_address:")) {
bootstrapJoin = strings.TrimSpace(parts[1]) parts := strings.SplitN(trimmed, ":", 2)
bootstrapJoin = strings.Trim(bootstrapJoin, "\"'") if len(parts) > 1 {
if bootstrapJoin == "null" || bootstrapJoin == "" { addr := strings.TrimSpace(parts[1])
bootstrapJoin = "" addr = strings.Trim(addr, "\"'")
if addr != "" && addr != "null" && addr != "localhost:5001" && addr != "localhost:7001" {
// Extract IP from address (format: "IP:PORT" or "[IPv6]:PORT")
if host, _, err := net.SplitHostPort(addr); err == nil && host != "" && host != "localhost" {
vpsIP = host
// Continue loop to also check for bootstrap join address
} }
} }
break }
}
// Extract bootstrap join address if it's a bootstrap node
if nodeType == "bootstrap" && strings.HasPrefix(trimmed, "rqlite_join_address:") {
parts := strings.SplitN(trimmed, ":", 2)
if len(parts) > 1 {
bootstrapJoin = strings.TrimSpace(parts[1])
bootstrapJoin = strings.Trim(bootstrapJoin, "\"'")
if bootstrapJoin == "null" || bootstrapJoin == "" {
bootstrapJoin = ""
}
} }
} }
} }
@ -487,6 +574,9 @@ func handleProdUpgrade(args []string) {
if len(bootstrapPeers) > 0 { if len(bootstrapPeers) > 0 {
fmt.Printf(" - Bootstrap peers: %d peer(s) preserved\n", len(bootstrapPeers)) fmt.Printf(" - Bootstrap peers: %d peer(s) preserved\n", len(bootstrapPeers))
} }
if vpsIP != "" {
fmt.Printf(" - VPS IP: %s\n", vpsIP)
}
if domain != "" { if domain != "" {
fmt.Printf(" - Domain: %s\n", domain) fmt.Printf(" - Domain: %s\n", domain)
} }
@ -494,7 +584,7 @@ func handleProdUpgrade(args []string) {
fmt.Printf(" - Bootstrap join address: %s\n", bootstrapJoin) fmt.Printf(" - Bootstrap join address: %s\n", bootstrapJoin)
} }
if err := setup.Phase4GenerateConfigs(nodeType == "bootstrap", bootstrapPeers, "", enableHTTPS, domain, bootstrapJoin); err != nil { if err := setup.Phase4GenerateConfigs(nodeType == "bootstrap", bootstrapPeers, vpsIP, enableHTTPS, domain, bootstrapJoin); err != nil {
fmt.Fprintf(os.Stderr, "⚠️ Config generation warning: %v\n", err) fmt.Fprintf(os.Stderr, "⚠️ Config generation warning: %v\n", err)
fmt.Fprintf(os.Stderr, " Existing configs preserved\n") fmt.Fprintf(os.Stderr, " Existing configs preserved\n")
} }
@ -506,23 +596,36 @@ func handleProdUpgrade(args []string) {
} }
fmt.Printf("\n✅ Upgrade complete!\n") fmt.Printf("\n✅ Upgrade complete!\n")
if restartServices { if *restartServices {
fmt.Printf(" Restarting services...\n") fmt.Printf(" Restarting services...\n")
// Reload systemd daemon // Reload systemd daemon
exec.Command("systemctl", "daemon-reload").Run() if err := exec.Command("systemctl", "daemon-reload").Run(); err != nil {
// Restart services to apply changes fmt.Fprintf(os.Stderr, " ⚠️ Warning: Failed to reload systemd daemon: %v\n", err)
services := []string{
"debros-ipfs-bootstrap",
"debros-ipfs-cluster-bootstrap",
// Note: RQLite is managed by node process, not as separate service
"debros-olric",
"debros-node-bootstrap",
"debros-gateway",
} }
for _, svc := range services { // Restart services to apply changes - use getProductionServices to only restart existing services
exec.Command("systemctl", "restart", svc).Run() services := getProductionServices()
if len(services) == 0 {
fmt.Printf(" ⚠️ No services found to restart\n")
} else {
for _, svc := range services {
if err := exec.Command("systemctl", "restart", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to restart %s: %v\n", svc, err)
} else {
fmt.Printf(" ✓ Restarted %s\n", svc)
}
}
fmt.Printf(" ✓ All services restarted\n")
// Give services a moment to fully initialize before verification
fmt.Printf(" ⏳ Waiting for services to initialize...\n")
time.Sleep(5 * time.Second)
// Verify services are healthy after restart
if err := verifyProductionRuntime("prod upgrade --restart"); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
fmt.Fprintf(os.Stderr, " Upgrade completed but services are not healthy. Check logs with: dbn prod logs <service>\n")
os.Exit(1)
}
fmt.Printf(" ✅ All services verified healthy\n")
} }
fmt.Printf(" ✓ Services restarted\n")
} else { } else {
fmt.Printf(" To apply changes, restart services:\n") fmt.Printf(" To apply changes, restart services:\n")
fmt.Printf(" sudo systemctl daemon-reload\n") fmt.Printf(" sudo systemctl daemon-reload\n")
@ -587,18 +690,113 @@ func handleProdStatus() {
fmt.Printf("\nView logs with: dbn prod logs <service>\n") fmt.Printf("\nView logs with: dbn prod logs <service>\n")
} }
// resolveServiceName resolves service aliases to actual systemd service names
func resolveServiceName(alias string) ([]string, error) {
// Service alias mapping
aliases := map[string][]string{
"node": {"debros-node-bootstrap", "debros-node-node"},
"ipfs": {"debros-ipfs-bootstrap", "debros-ipfs-node"},
"cluster": {"debros-ipfs-cluster-bootstrap", "debros-ipfs-cluster-node"},
"ipfs-cluster": {"debros-ipfs-cluster-bootstrap", "debros-ipfs-cluster-node"},
"gateway": {"debros-gateway"},
"olric": {"debros-olric"},
"rqlite": {"debros-node-bootstrap", "debros-node-node"}, // RQLite logs are in node logs
}
// Check if it's an alias
if serviceNames, ok := aliases[strings.ToLower(alias)]; ok {
// Filter to only existing services
var existing []string
for _, svc := range serviceNames {
unitPath := filepath.Join("/etc/systemd/system", svc+".service")
if _, err := os.Stat(unitPath); err == nil {
existing = append(existing, svc)
}
}
if len(existing) == 0 {
return nil, fmt.Errorf("no services found for alias %q", alias)
}
return existing, nil
}
// Check if it's already a full service name
unitPath := filepath.Join("/etc/systemd/system", alias+".service")
if _, err := os.Stat(unitPath); err == nil {
return []string{alias}, nil
}
// Try without .service suffix
if !strings.HasSuffix(alias, ".service") {
unitPath = filepath.Join("/etc/systemd/system", alias+".service")
if _, err := os.Stat(unitPath); err == nil {
return []string{alias}, nil
}
}
return nil, fmt.Errorf("service %q not found. Use: node, ipfs, cluster, gateway, olric, or full service name", alias)
}
func handleProdLogs(args []string) { func handleProdLogs(args []string) {
if len(args) == 0 { if len(args) == 0 {
fmt.Fprintf(os.Stderr, "Usage: dbn prod logs <service> [--follow]\n") fmt.Fprintf(os.Stderr, "Usage: dbn prod logs <service> [--follow]\n")
fmt.Fprintf(os.Stderr, "\nService aliases:\n")
fmt.Fprintf(os.Stderr, " node, ipfs, cluster, gateway, olric\n")
fmt.Fprintf(os.Stderr, "\nOr use full service name:\n")
fmt.Fprintf(os.Stderr, " debros-node-bootstrap, debros-gateway, etc.\n")
os.Exit(1) os.Exit(1)
} }
service := args[0] serviceAlias := args[0]
follow := false follow := false
if len(args) > 1 && (args[1] == "--follow" || args[1] == "-f") { if len(args) > 1 && (args[1] == "--follow" || args[1] == "-f") {
follow = true follow = true
} }
// Resolve service alias to actual service names
serviceNames, err := resolveServiceName(serviceAlias)
if err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
fmt.Fprintf(os.Stderr, "\nAvailable service aliases: node, ipfs, cluster, gateway, olric\n")
fmt.Fprintf(os.Stderr, "Or use full service name like: debros-node-bootstrap\n")
os.Exit(1)
}
// If multiple services match, show all of them
if len(serviceNames) > 1 {
if follow {
fmt.Fprintf(os.Stderr, "⚠️ Multiple services match alias %q:\n", serviceAlias)
for _, svc := range serviceNames {
fmt.Fprintf(os.Stderr, " - %s\n", svc)
}
fmt.Fprintf(os.Stderr, "\nShowing logs for all matching services...\n\n")
// Use journalctl with multiple units (build args correctly)
args := []string{}
for _, svc := range serviceNames {
args = append(args, "-u", svc)
}
args = append(args, "-f")
cmd := exec.Command("journalctl", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
cmd.Run()
} else {
for i, svc := range serviceNames {
if i > 0 {
fmt.Printf("\n" + strings.Repeat("=", 70) + "\n\n")
}
fmt.Printf("📋 Logs for %s:\n\n", svc)
cmd := exec.Command("journalctl", "-u", svc, "-n", "50")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Run()
}
}
return
}
// Single service
service := serviceNames[0]
if follow { if follow {
fmt.Printf("Following logs for %s (press Ctrl+C to stop)...\n\n", service) fmt.Printf("Following logs for %s (press Ctrl+C to stop)...\n\n", service)
cmd := exec.Command("journalctl", "-u", service, "-f") cmd := exec.Command("journalctl", "-u", service, "-f")
@ -614,6 +812,165 @@ func handleProdLogs(args []string) {
} }
} }
// errServiceNotFound marks units that systemd does not know about.
var errServiceNotFound = errors.New("service not found")
type portSpec struct {
Name string
Port int
}
var servicePorts = map[string][]portSpec{
"debros-gateway": {{"Gateway API", 6001}},
"debros-olric": {{"Olric HTTP", 3320}, {"Olric Memberlist", 3322}},
"debros-node-bootstrap": {{"RQLite HTTP", 5001}, {"RQLite Raft", 7001}, {"IPFS Cluster API", 9094}},
"debros-node-node": {{"RQLite HTTP", 5001}, {"RQLite Raft", 7001}, {"IPFS Cluster API", 9094}},
"debros-ipfs-bootstrap": {{"IPFS API", 4501}, {"IPFS Gateway", 8080}, {"IPFS Swarm", 4001}},
"debros-ipfs-node": {{"IPFS API", 4501}, {"IPFS Gateway", 8080}, {"IPFS Swarm", 4001}},
"debros-ipfs-cluster-bootstrap": {{"IPFS Cluster API", 9094}},
"debros-ipfs-cluster-node": {{"IPFS Cluster API", 9094}},
}
// defaultPorts is used for fresh installs/upgrades before unit files exist.
func defaultPorts() []portSpec {
return []portSpec{
{"IPFS Swarm", 4001},
{"IPFS API", 4501},
{"IPFS Gateway", 8080},
{"Gateway API", 6001},
{"RQLite HTTP", 5001},
{"RQLite Raft", 7001},
{"IPFS Cluster API", 9094},
{"Olric HTTP", 3320},
{"Olric Memberlist", 3322},
}
}
func isServiceActive(service string) (bool, error) {
cmd := exec.Command("systemctl", "is-active", "--quiet", service)
if err := cmd.Run(); err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
switch exitErr.ExitCode() {
case 3:
return false, nil
case 4:
return false, errServiceNotFound
}
}
return false, err
}
return true, nil
}
func collectPortsForServices(services []string, skipActive bool) ([]portSpec, error) {
seen := make(map[int]portSpec)
for _, svc := range services {
if skipActive {
active, err := isServiceActive(svc)
if err != nil {
return nil, fmt.Errorf("unable to check %s: %w", svc, err)
}
if active {
continue
}
}
for _, spec := range servicePorts[svc] {
if _, ok := seen[spec.Port]; !ok {
seen[spec.Port] = spec
}
}
}
ports := make([]portSpec, 0, len(seen))
for _, spec := range seen {
ports = append(ports, spec)
}
return ports, nil
}
func ensurePortsAvailable(action string, ports []portSpec) error {
for _, spec := range ports {
ln, err := net.Listen("tcp", fmt.Sprintf("0.0.0.0:%d", spec.Port))
if err != nil {
if errors.Is(err, syscall.EADDRINUSE) || strings.Contains(err.Error(), "address already in use") {
return fmt.Errorf("%s cannot continue: %s (port %d) is already in use", action, spec.Name, spec.Port)
}
return fmt.Errorf("%s cannot continue: failed to inspect %s (port %d): %w", action, spec.Name, spec.Port, err)
}
_ = ln.Close()
}
return nil
}
func checkHTTP(client *http.Client, method, url, label string) error {
req, err := http.NewRequest(method, url, nil)
if err != nil {
return fmt.Errorf("%s check failed: %w", label, err)
}
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("%s check failed: %w", label, err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("%s returned HTTP %d", label, resp.StatusCode)
}
return nil
}
func serviceExists(name string) bool {
unitPath := filepath.Join("/etc/systemd/system", name+".service")
_, err := os.Stat(unitPath)
return err == nil
}
func verifyProductionRuntime(action string) error {
services := getProductionServices()
issues := make([]string, 0)
for _, svc := range services {
active, err := isServiceActive(svc)
if err != nil {
issues = append(issues, fmt.Sprintf("%s status unknown (%v)", svc, err))
continue
}
if !active {
issues = append(issues, fmt.Sprintf("%s is inactive", svc))
}
}
client := &http.Client{Timeout: 3 * time.Second}
if err := checkHTTP(client, "GET", "http://127.0.0.1:5001/status", "RQLite status"); err == nil {
} else if serviceExists("debros-node-bootstrap") || serviceExists("debros-node-node") {
issues = append(issues, err.Error())
}
if err := checkHTTP(client, "POST", "http://127.0.0.1:4501/api/v0/version", "IPFS API"); err == nil {
} else if serviceExists("debros-ipfs-bootstrap") || serviceExists("debros-ipfs-node") {
issues = append(issues, err.Error())
}
if err := checkHTTP(client, "GET", "http://127.0.0.1:9094/health", "IPFS Cluster"); err == nil {
} else if serviceExists("debros-ipfs-cluster-bootstrap") || serviceExists("debros-ipfs-cluster-node") {
issues = append(issues, err.Error())
}
if err := checkHTTP(client, "GET", "http://127.0.0.1:6001/health", "Gateway health"); err == nil {
} else if serviceExists("debros-gateway") {
issues = append(issues, err.Error())
}
if err := checkHTTP(client, "GET", "http://127.0.0.1:3320/ping", "Olric ping"); err == nil {
} else if serviceExists("debros-olric") {
issues = append(issues, err.Error())
}
if len(issues) > 0 {
return fmt.Errorf("%s verification failed:\n - %s", action, strings.Join(issues, "\n - "))
}
return nil
}
// getProductionServices returns a list of all DeBros production service names that exist // getProductionServices returns a list of all DeBros production service names that exist
func getProductionServices() []string { func getProductionServices() []string {
// All possible service names (both bootstrap and node variants) // All possible service names (both bootstrap and node variants)
@ -655,16 +1012,57 @@ func handleProdStart() {
return return
} }
// Check which services are inactive and need to be started
inactive := make([]string, 0, len(services))
for _, svc := range services { for _, svc := range services {
cmd := exec.Command("systemctl", "start", svc) active, err := isServiceActive(svc)
if err := cmd.Run(); err != nil { if err != nil {
fmt.Printf(" ⚠️ Unable to check %s: %v\n", svc, err)
continue
}
if active {
fmt.Printf(" %s already running\n", svc)
continue
}
inactive = append(inactive, svc)
}
if len(inactive) == 0 {
fmt.Printf("\n✅ All services already running\n")
return
}
// Check port availability for services we're about to start
ports, err := collectPortsForServices(inactive, false)
if err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
if err := ensurePortsAvailable("prod start", ports); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
// Start inactive services
for _, svc := range inactive {
if err := exec.Command("systemctl", "start", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err) fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err)
} else { } else {
fmt.Printf(" ✓ Started %s\n", svc) fmt.Printf(" ✓ Started %s\n", svc)
} }
} }
fmt.Printf("\n✅ All services started\n") // Give services a moment to fully initialize before verification
fmt.Printf(" ⏳ Waiting for services to initialize...\n")
time.Sleep(3 * time.Second)
// Verify all services are healthy
if err := verifyProductionRuntime("prod start"); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
fmt.Printf("\n✅ All services started and healthy\n")
} }
func handleProdStop() { func handleProdStop() {
@ -681,16 +1079,41 @@ func handleProdStop() {
return return
} }
hadError := false
for _, svc := range services { for _, svc := range services {
cmd := exec.Command("systemctl", "stop", svc) active, err := isServiceActive(svc)
if err := cmd.Run(); err != nil { if err != nil {
fmt.Printf(" ⚠️ Unable to check %s: %v\n", svc, err)
hadError = true
continue
}
if !active {
fmt.Printf(" %s already stopped\n", svc)
continue
}
if err := exec.Command("systemctl", "stop", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to stop %s: %v\n", svc, err) fmt.Printf(" ⚠️ Failed to stop %s: %v\n", svc, err)
hadError = true
continue
}
// Verify the service actually stopped and didn't restart itself
if stillActive, err := isServiceActive(svc); err != nil {
fmt.Printf(" ⚠️ Unable to verify %s stop: %v\n", svc, err)
hadError = true
} else if stillActive {
fmt.Printf(" ❌ %s restarted itself immediately\n", svc)
hadError = true
} else { } else {
fmt.Printf(" ✓ Stopped %s\n", svc) fmt.Printf(" ✓ Stopped %s\n", svc)
} }
} }
fmt.Printf("\n✅ All services stopped\n") if hadError {
fmt.Fprintf(os.Stderr, "\n❌ One or more services failed to stop cleanly\n")
os.Exit(1)
}
fmt.Printf("\n✅ All services stopped and remain inactive\n")
} }
func handleProdRestart() { func handleProdRestart() {
@ -707,16 +1130,57 @@ func handleProdRestart() {
return return
} }
// Stop all active services first
fmt.Printf(" Stopping services...\n")
for _, svc := range services { for _, svc := range services {
cmd := exec.Command("systemctl", "restart", svc) active, err := isServiceActive(svc)
if err := cmd.Run(); err != nil { if err != nil {
fmt.Printf(" ⚠️ Failed to restart %s: %v\n", svc, err) fmt.Printf(" ⚠️ Unable to check %s: %v\n", svc, err)
continue
}
if !active {
fmt.Printf(" %s was already stopped\n", svc)
continue
}
if err := exec.Command("systemctl", "stop", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to stop %s: %v\n", svc, err)
} else { } else {
fmt.Printf(" ✓ Restarted %s\n", svc) fmt.Printf(" ✓ Stopped %s\n", svc)
} }
} }
fmt.Printf("\n✅ All services restarted\n") // Check port availability before restarting
ports, err := collectPortsForServices(services, false)
if err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
if err := ensurePortsAvailable("prod restart", ports); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
// Start all services
fmt.Printf(" Starting services...\n")
for _, svc := range services {
if err := exec.Command("systemctl", "start", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err)
} else {
fmt.Printf(" ✓ Started %s\n", svc)
}
}
// Give services a moment to fully initialize before verification
fmt.Printf(" ⏳ Waiting for services to initialize...\n")
time.Sleep(3 * time.Second)
// Verify all services are healthy
if err := verifyProductionRuntime("prod restart"); err != nil {
fmt.Fprintf(os.Stderr, "❌ %v\n", err)
os.Exit(1)
}
fmt.Printf("\n✅ All services restarted and healthy\n")
} }
func handleProdUninstall() { func handleProdUninstall() {

View File

@ -4,6 +4,7 @@ import (
"crypto/rand" "crypto/rand"
"encoding/hex" "encoding/hex"
"fmt" "fmt"
"net"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
@ -12,6 +13,7 @@ import (
"github.com/DeBrosOfficial/network/pkg/environments/templates" "github.com/DeBrosOfficial/network/pkg/environments/templates"
"github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/crypto"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
"github.com/multiformats/go-multiaddr"
) )
// ConfigGenerator manages generation of node, gateway, and service configs // ConfigGenerator manages generation of node, gateway, and service configs
@ -26,6 +28,69 @@ func NewConfigGenerator(debrosDir string) *ConfigGenerator {
} }
} }
// extractIPFromMultiaddr extracts the IP address from a bootstrap peer multiaddr
// Supports IP4, IP6, DNS4, DNS6, and DNSADDR protocols
// Returns the IP address as a string, or empty string if extraction/resolution fails
func extractIPFromMultiaddr(multiaddrStr string) string {
ma, err := multiaddr.NewMultiaddr(multiaddrStr)
if err != nil {
return ""
}
// First, try to extract direct IP address
var ip net.IP
var dnsName string
multiaddr.ForEach(ma, func(c multiaddr.Component) bool {
switch c.Protocol().Code {
case multiaddr.P_IP4, multiaddr.P_IP6:
ip = net.ParseIP(c.Value())
return false // Stop iteration - found IP
case multiaddr.P_DNS4, multiaddr.P_DNS6, multiaddr.P_DNSADDR:
dnsName = c.Value()
// Continue to check for IP, but remember DNS name as fallback
}
return true
})
// If we found a direct IP, return it
if ip != nil {
return ip.String()
}
// If we found a DNS name, try to resolve it
if dnsName != "" {
if resolvedIPs, err := net.LookupIP(dnsName); err == nil && len(resolvedIPs) > 0 {
// Prefer IPv4 addresses, but accept IPv6 if that's all we have
for _, resolvedIP := range resolvedIPs {
if resolvedIP.To4() != nil {
return resolvedIP.String()
}
}
// Return first IPv6 address if no IPv4 found
return resolvedIPs[0].String()
}
}
return ""
}
// inferBootstrapIP extracts the IP address from bootstrap peer multiaddrs
// Iterates through all bootstrap peers to find a valid IP (supports DNS resolution)
// Falls back to vpsIP if provided, otherwise returns empty string
func inferBootstrapIP(bootstrapPeers []string, vpsIP string) string {
// Try to extract IP from each bootstrap peer (in order)
for _, peer := range bootstrapPeers {
if ip := extractIPFromMultiaddr(peer); ip != "" {
return ip
}
}
// Fall back to vpsIP if provided
if vpsIP != "" {
return vpsIP
}
return ""
}
// GenerateNodeConfig generates node.yaml configuration // GenerateNodeConfig generates node.yaml configuration
func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers []string, vpsIP string, bootstrapJoin string) (string, error) { func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers []string, vpsIP string, bootstrapJoin string) (string, error) {
var nodeID string var nodeID string
@ -35,6 +100,38 @@ func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers [
nodeID = "node" nodeID = "node"
} }
// Determine advertise addresses
// For bootstrap: use vpsIP if provided, otherwise localhost
// For regular nodes: infer from bootstrap peers or use vpsIP
var httpAdvAddr, raftAdvAddr string
if isBootstrap {
if vpsIP != "" {
httpAdvAddr = net.JoinHostPort(vpsIP, "5001")
raftAdvAddr = net.JoinHostPort(vpsIP, "7001")
} else {
httpAdvAddr = "localhost:5001"
raftAdvAddr = "localhost:7001"
}
} else {
// Regular node: infer from bootstrap peers or use vpsIP
bootstrapIP := inferBootstrapIP(bootstrapPeers, vpsIP)
if bootstrapIP != "" {
// Use the bootstrap IP for advertise addresses (this node should be reachable at same network)
// If vpsIP is provided, use it; otherwise use bootstrap IP
if vpsIP != "" {
httpAdvAddr = net.JoinHostPort(vpsIP, "5001")
raftAdvAddr = net.JoinHostPort(vpsIP, "7001")
} else {
httpAdvAddr = net.JoinHostPort(bootstrapIP, "5001")
raftAdvAddr = net.JoinHostPort(bootstrapIP, "7001")
}
} else {
// Fallback to localhost if nothing can be inferred
httpAdvAddr = "localhost:5001"
raftAdvAddr = "localhost:7001"
}
}
if isBootstrap { if isBootstrap {
// Bootstrap node - populate peer list and optional join address // Bootstrap node - populate peer list and optional join address
data := templates.BootstrapConfigData{ data := templates.BootstrapConfigData{
@ -47,14 +144,35 @@ func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers [
IPFSAPIPort: 4501, IPFSAPIPort: 4501,
BootstrapPeers: bootstrapPeers, BootstrapPeers: bootstrapPeers,
RQLiteJoinAddress: bootstrapJoin, RQLiteJoinAddress: bootstrapJoin,
HTTPAdvAddress: httpAdvAddr,
RaftAdvAddress: raftAdvAddr,
} }
return templates.RenderBootstrapConfig(data) return templates.RenderBootstrapConfig(data)
} }
// Regular node - must have join address // Regular node - infer join address from bootstrap peers
rqliteJoinAddr := "localhost:7001" // MUST extract from bootstrap_peers - no fallback to vpsIP (would cause self-join)
if vpsIP != "" { var rqliteJoinAddr string
rqliteJoinAddr = vpsIP + ":7001" bootstrapIP := inferBootstrapIP(bootstrapPeers, "")
if bootstrapIP == "" {
// Try to extract from first bootstrap peer directly as fallback
if len(bootstrapPeers) > 0 {
if extractedIP := extractIPFromMultiaddr(bootstrapPeers[0]); extractedIP != "" {
bootstrapIP = extractedIP
}
}
// If still no IP, fail - we cannot join without a valid bootstrap address
if bootstrapIP == "" {
return "", fmt.Errorf("cannot determine RQLite join address: failed to extract IP from bootstrap peers %v (required for non-bootstrap nodes)", bootstrapPeers)
}
}
rqliteJoinAddr = net.JoinHostPort(bootstrapIP, "7001")
// Validate that join address doesn't match this node's own raft address (would cause self-join)
if rqliteJoinAddr == raftAdvAddr {
return "", fmt.Errorf("invalid configuration: rqlite_join_address (%s) cannot match raft_adv_address (%s) - node cannot join itself", rqliteJoinAddr, raftAdvAddr)
} }
data := templates.NodeConfigData{ data := templates.NodeConfigData{
@ -67,6 +185,8 @@ func (cg *ConfigGenerator) GenerateNodeConfig(isBootstrap bool, bootstrapPeers [
BootstrapPeers: bootstrapPeers, BootstrapPeers: bootstrapPeers,
ClusterAPIPort: 9094, ClusterAPIPort: 9094,
IPFSAPIPort: 4501, IPFSAPIPort: 4501,
HTTPAdvAddress: httpAdvAddr,
RaftAdvAddress: raftAdvAddr,
} }
return templates.RenderNodeConfig(data) return templates.RenderNodeConfig(data)
} }

View File

@ -3,6 +3,7 @@ package production
import ( import (
"fmt" "fmt"
"io" "io"
"net"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
@ -368,8 +369,20 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(isBootstrap bool, bootstrapPeer
} }
ps.logf(" ✓ Node config generated: %s", configFile) ps.logf(" ✓ Node config generated: %s", configFile)
// Gateway config // Gateway config - infer Olric servers from bootstrap peers or use localhost
olricServers := []string{"127.0.0.1:3320"} olricServers := []string{"127.0.0.1:3320"} // Default to localhost for single-node
if len(bootstrapPeers) > 0 {
// Try to infer Olric servers from bootstrap peers
bootstrapIP := inferBootstrapIP(bootstrapPeers, vpsIP)
if bootstrapIP != "" {
// Add bootstrap Olric server (use net.JoinHostPort for IPv6 support)
olricServers = []string{net.JoinHostPort(bootstrapIP, "3320")}
// If this is not bootstrap and vpsIP is provided, add local Olric server too
if !isBootstrap && vpsIP != "" {
olricServers = append(olricServers, net.JoinHostPort(vpsIP, "3320"))
}
}
}
gatewayConfig, err := ps.configGenerator.GenerateGatewayConfig(bootstrapPeers, enableHTTPS, domain, olricServers) gatewayConfig, err := ps.configGenerator.GenerateGatewayConfig(bootstrapPeers, enableHTTPS, domain, olricServers)
if err != nil { if err != nil {
return fmt.Errorf("failed to generate gateway config: %w", err) return fmt.Errorf("failed to generate gateway config: %w", err)
@ -380,8 +393,8 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(isBootstrap bool, bootstrapPeer
} }
ps.logf(" ✓ Gateway config generated") ps.logf(" ✓ Gateway config generated")
// Olric config // Olric config - use 0.0.0.0 to bind on all interfaces
olricConfig, err := ps.configGenerator.GenerateOlricConfig("localhost", 3320, 3322) olricConfig, err := ps.configGenerator.GenerateOlricConfig("0.0.0.0", 3320, 3322)
if err != nil { if err != nil {
return fmt.Errorf("failed to generate olric config: %w", err) return fmt.Errorf("failed to generate olric config: %w", err)
} }
@ -533,7 +546,7 @@ func (ps *ProductionSetup) LogSetupComplete(peerID string) {
ps.logf(" %s/logs/node-bootstrap.log", ps.debrosDir) ps.logf(" %s/logs/node-bootstrap.log", ps.debrosDir)
ps.logf(" %s/logs/gateway.log", ps.debrosDir) ps.logf(" %s/logs/gateway.log", ps.debrosDir)
ps.logf("\nStart All Services:") ps.logf("\nStart All Services:")
ps.logf(" systemctl start debros-ipfs-bootstrap debros-ipfs-cluster-bootstrap debros-rqlite-bootstrap debros-olric debros-node-bootstrap debros-gateway") ps.logf(" systemctl start debros-ipfs-bootstrap debros-ipfs-cluster-bootstrap debros-olric debros-node-bootstrap debros-gateway")
ps.logf("\nVerify Installation:") ps.logf("\nVerify Installation:")
ps.logf(" curl http://localhost:6001/health") ps.logf(" curl http://localhost:6001/health")
ps.logf(" curl http://localhost:5001/status\n") ps.logf(" curl http://localhost:5001/status\n")

View File

@ -245,7 +245,7 @@ User=debros
Group=debros Group=debros
WorkingDirectory=%s WorkingDirectory=%s
Environment=HOME=%s Environment=HOME=%s
ExecStart=%s/bin/gateway --config %s/data/gateway.yaml ExecStart=%s/bin/gateway --config %s/data/gateway.yaml
Restart=always Restart=always
RestartSec=5 RestartSec=5
StandardOutput=file:%s StandardOutput=file:%s

View File

@ -31,8 +31,8 @@ discovery:
{{end}} {{end}}
discovery_interval: "15s" discovery_interval: "15s"
bootstrap_port: {{.P2PPort}} bootstrap_port: {{.P2PPort}}
http_adv_address: "localhost:{{.RQLiteHTTPPort}}" http_adv_address: "{{.HTTPAdvAddress}}"
raft_adv_address: "localhost:{{.RQLiteRaftPort}}" raft_adv_address: "{{.RaftAdvAddress}}"
node_namespace: "default" node_namespace: "default"
security: security:

View File

@ -31,8 +31,8 @@ discovery:
{{end}} {{end}}
discovery_interval: "15s" discovery_interval: "15s"
bootstrap_port: {{.P2PPort}} bootstrap_port: {{.P2PPort}}
http_adv_address: "localhost:{{.RQLiteHTTPPort}}" http_adv_address: "{{.HTTPAdvAddress}}"
raft_adv_address: "localhost:{{.RQLiteRaftPort}}" raft_adv_address: "{{.RaftAdvAddress}}"
node_namespace: "default" node_namespace: "default"
security: security:

View File

@ -22,6 +22,8 @@ type BootstrapConfigData struct {
IPFSAPIPort int // Default: 4501 IPFSAPIPort int // Default: 4501
BootstrapPeers []string // List of bootstrap peer multiaddrs BootstrapPeers []string // List of bootstrap peer multiaddrs
RQLiteJoinAddress string // Optional: join address for secondary bootstraps RQLiteJoinAddress string // Optional: join address for secondary bootstraps
HTTPAdvAddress string // Advertised HTTP address (IP:port)
RaftAdvAddress string // Advertised Raft address (IP:port)
} }
// NodeConfigData holds parameters for node.yaml rendering // NodeConfigData holds parameters for node.yaml rendering
@ -34,7 +36,9 @@ type NodeConfigData struct {
RQLiteJoinAddress string RQLiteJoinAddress string
BootstrapPeers []string BootstrapPeers []string
ClusterAPIPort int ClusterAPIPort int
IPFSAPIPort int // Default: 4501+ IPFSAPIPort int // Default: 4501+
HTTPAdvAddress string // Advertised HTTP address (IP:port)
RaftAdvAddress string // Advertised Raft address (IP:port)
} }
// GatewayConfigData holds parameters for gateway.yaml rendering // GatewayConfigData holds parameters for gateway.yaml rendering

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
mathrand "math/rand" mathrand "math/rand"
"net"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@ -131,6 +132,51 @@ func (n *Node) startRQLite(ctx context.Context) error {
return nil return nil
} }
// extractIPFromMultiaddr extracts the IP address from a bootstrap peer multiaddr
// Supports IP4, IP6, DNS4, DNS6, and DNSADDR protocols
func extractIPFromMultiaddr(multiaddrStr string) string {
ma, err := multiaddr.NewMultiaddr(multiaddrStr)
if err != nil {
return ""
}
// First, try to extract direct IP address
var ip string
var dnsName string
multiaddr.ForEach(ma, func(c multiaddr.Component) bool {
switch c.Protocol().Code {
case multiaddr.P_IP4, multiaddr.P_IP6:
ip = c.Value()
return false // Stop iteration - found IP
case multiaddr.P_DNS4, multiaddr.P_DNS6, multiaddr.P_DNSADDR:
dnsName = c.Value()
// Continue to check for IP, but remember DNS name as fallback
}
return true
})
// If we found a direct IP, return it
if ip != "" {
return ip
}
// If we found a DNS name, try to resolve it
if dnsName != "" {
if resolvedIPs, err := net.LookupIP(dnsName); err == nil && len(resolvedIPs) > 0 {
// Prefer IPv4 addresses, but accept IPv6 if that's all we have
for _, resolvedIP := range resolvedIPs {
if resolvedIP.To4() != nil {
return resolvedIP.String()
}
}
// Return first IPv6 address if no IPv4 found
return resolvedIPs[0].String()
}
}
return ""
}
// bootstrapPeerSource returns a PeerSource that yields peers from BootstrapPeers. // bootstrapPeerSource returns a PeerSource that yields peers from BootstrapPeers.
func bootstrapPeerSource(bootstrapAddrs []string, logger *zap.Logger) func(context.Context, int) <-chan peer.AddrInfo { func bootstrapPeerSource(bootstrapAddrs []string, logger *zap.Logger) func(context.Context, int) <-chan peer.AddrInfo {
return func(ctx context.Context, num int) <-chan peer.AddrInfo { return func(ctx context.Context, num int) <-chan peer.AddrInfo {
@ -688,10 +734,14 @@ func (n *Node) startIPFSClusterConfig() error {
// If this is not the bootstrap node, try to update bootstrap peer info // If this is not the bootstrap node, try to update bootstrap peer info
if n.config.Node.Type != "bootstrap" && len(n.config.Discovery.BootstrapPeers) > 0 { if n.config.Node.Type != "bootstrap" && len(n.config.Discovery.BootstrapPeers) > 0 {
// Try to find bootstrap cluster API URL from config // Infer bootstrap cluster API URL from first bootstrap peer multiaddr
// For now, we'll discover it from the first bootstrap peer bootstrapClusterAPI := "http://localhost:9094" // Default fallback
// In a real scenario, you might want to configure this explicitly if len(n.config.Discovery.BootstrapPeers) > 0 {
bootstrapClusterAPI := "http://localhost:9094" // Default bootstrap cluster API // Extract IP from first bootstrap peer multiaddr
if ip := extractIPFromMultiaddr(n.config.Discovery.BootstrapPeers[0]); ip != "" {
bootstrapClusterAPI = fmt.Sprintf("http://%s:9094", ip)
}
}
if err := cm.UpdateBootstrapPeers(bootstrapClusterAPI); err != nil { if err := cm.UpdateBootstrapPeers(bootstrapClusterAPI); err != nil {
n.logger.ComponentWarn(logging.ComponentNode, "Failed to update bootstrap peers, will retry later", zap.Error(err)) n.logger.ComponentWarn(logging.ComponentNode, "Failed to update bootstrap peers, will retry later", zap.Error(err))
// Don't fail - peers can connect later via mDNS or manual config // Don't fail - peers can connect later via mDNS or manual config

View File

@ -4,6 +4,8 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"net"
"net/netip"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@ -13,6 +15,7 @@ import (
"github.com/DeBrosOfficial/network/pkg/discovery" "github.com/DeBrosOfficial/network/pkg/discovery"
"github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
"github.com/multiformats/go-multiaddr"
"go.uber.org/zap" "go.uber.org/zap"
) )
@ -159,18 +162,32 @@ func (c *ClusterDiscoveryService) collectPeerMetadata() []*discovery.RQLiteNodeM
c.logger.Debug("Collecting peer metadata from LibP2P", c.logger.Debug("Collecting peer metadata from LibP2P",
zap.Int("connected_libp2p_peers", len(connectedPeers))) zap.Int("connected_libp2p_peers", len(connectedPeers)))
c.mu.RLock()
currentRaftAddr := c.raftAddress
currentHTTPAddr := c.httpAddress
c.mu.RUnlock()
// Add ourselves // Add ourselves
ourMetadata := &discovery.RQLiteNodeMetadata{ ourMetadata := &discovery.RQLiteNodeMetadata{
NodeID: c.raftAddress, // RQLite uses raft address as node ID NodeID: currentRaftAddr, // RQLite uses raft address as node ID
RaftAddress: c.raftAddress, RaftAddress: currentRaftAddr,
HTTPAddress: c.httpAddress, HTTPAddress: currentHTTPAddr,
NodeType: c.nodeType, NodeType: c.nodeType,
RaftLogIndex: c.rqliteManager.getRaftLogIndex(), RaftLogIndex: c.rqliteManager.getRaftLogIndex(),
LastSeen: time.Now(), LastSeen: time.Now(),
ClusterVersion: "1.0", ClusterVersion: "1.0",
} }
if c.adjustSelfAdvertisedAddresses(ourMetadata) {
c.logger.Debug("Adjusted self-advertised RQLite addresses",
zap.String("raft_address", ourMetadata.RaftAddress),
zap.String("http_address", ourMetadata.HTTPAddress))
}
metadata = append(metadata, ourMetadata) metadata = append(metadata, ourMetadata)
staleNodeIDs := make([]string, 0)
// Query connected peers for their RQLite metadata // Query connected peers for their RQLite metadata
// For now, we'll use a simple approach - store metadata in peer metadata store // For now, we'll use a simple approach - store metadata in peer metadata store
// In a full implementation, this would use a custom protocol to exchange RQLite metadata // In a full implementation, this would use a custom protocol to exchange RQLite metadata
@ -181,6 +198,9 @@ func (c *ClusterDiscoveryService) collectPeerMetadata() []*discovery.RQLiteNodeM
if jsonData, ok := val.([]byte); ok { if jsonData, ok := val.([]byte); ok {
var peerMeta discovery.RQLiteNodeMetadata var peerMeta discovery.RQLiteNodeMetadata
if err := json.Unmarshal(jsonData, &peerMeta); err == nil { if err := json.Unmarshal(jsonData, &peerMeta); err == nil {
if updated, stale := c.adjustPeerAdvertisedAddresses(peerID, &peerMeta); updated && stale != "" {
staleNodeIDs = append(staleNodeIDs, stale)
}
peerMeta.LastSeen = time.Now() peerMeta.LastSeen = time.Now()
metadata = append(metadata, &peerMeta) metadata = append(metadata, &peerMeta)
} }
@ -188,6 +208,16 @@ func (c *ClusterDiscoveryService) collectPeerMetadata() []*discovery.RQLiteNodeM
} }
} }
// Clean up stale entries if NodeID changed
if len(staleNodeIDs) > 0 {
c.mu.Lock()
for _, id := range staleNodeIDs {
delete(c.knownPeers, id)
delete(c.peerHealth, id)
}
c.mu.Unlock()
}
return metadata return metadata
} }
@ -366,6 +396,10 @@ func (c *ClusterDiscoveryService) getPeersJSONUnlocked() []map[string]interface{
peers := make([]map[string]interface{}, 0, len(c.knownPeers)) peers := make([]map[string]interface{}, 0, len(c.knownPeers))
for _, peer := range c.knownPeers { for _, peer := range c.knownPeers {
// Skip self - RQLite knows about itself, shouldn't be in peers.json
if peer.NodeID == c.raftAddress {
continue
}
peerEntry := map[string]interface{}{ peerEntry := map[string]interface{}{
"id": peer.RaftAddress, // RQLite uses raft address as node ID "id": peer.RaftAddress, // RQLite uses raft address as node ID
"address": peer.RaftAddress, "address": peer.RaftAddress,
@ -594,16 +628,28 @@ func (c *ClusterDiscoveryService) TriggerPeerExchange(ctx context.Context) error
// UpdateOwnMetadata updates our own RQLite metadata in the peerstore // UpdateOwnMetadata updates our own RQLite metadata in the peerstore
func (c *ClusterDiscoveryService) UpdateOwnMetadata() { func (c *ClusterDiscoveryService) UpdateOwnMetadata() {
c.mu.RLock()
currentRaftAddr := c.raftAddress
currentHTTPAddr := c.httpAddress
c.mu.RUnlock()
metadata := &discovery.RQLiteNodeMetadata{ metadata := &discovery.RQLiteNodeMetadata{
NodeID: c.raftAddress, // RQLite uses raft address as node ID NodeID: currentRaftAddr, // RQLite uses raft address as node ID
RaftAddress: c.raftAddress, RaftAddress: currentRaftAddr,
HTTPAddress: c.httpAddress, HTTPAddress: currentHTTPAddr,
NodeType: c.nodeType, NodeType: c.nodeType,
RaftLogIndex: c.rqliteManager.getRaftLogIndex(), RaftLogIndex: c.rqliteManager.getRaftLogIndex(),
LastSeen: time.Now(), LastSeen: time.Now(),
ClusterVersion: "1.0", ClusterVersion: "1.0",
} }
// Adjust addresses if needed
if c.adjustSelfAdvertisedAddresses(metadata) {
c.logger.Debug("Adjusted self-advertised RQLite addresses in UpdateOwnMetadata",
zap.String("raft_address", metadata.RaftAddress),
zap.String("http_address", metadata.HTTPAddress))
}
// Store in our own peerstore for peer exchange // Store in our own peerstore for peer exchange
data, err := json.Marshal(metadata) data, err := json.Marshal(metadata)
if err != nil { if err != nil {
@ -623,6 +669,21 @@ func (c *ClusterDiscoveryService) UpdateOwnMetadata() {
// StoreRemotePeerMetadata stores metadata received from a remote peer // StoreRemotePeerMetadata stores metadata received from a remote peer
func (c *ClusterDiscoveryService) StoreRemotePeerMetadata(peerID peer.ID, metadata *discovery.RQLiteNodeMetadata) error { func (c *ClusterDiscoveryService) StoreRemotePeerMetadata(peerID peer.ID, metadata *discovery.RQLiteNodeMetadata) error {
if metadata == nil {
return fmt.Errorf("metadata is nil")
}
// Adjust addresses if needed (replace localhost with actual IP)
if updated, stale := c.adjustPeerAdvertisedAddresses(peerID, metadata); updated && stale != "" {
// Clean up stale entry if NodeID changed
c.mu.Lock()
delete(c.knownPeers, stale)
delete(c.peerHealth, stale)
c.mu.Unlock()
}
metadata.LastSeen = time.Now()
data, err := json.Marshal(metadata) data, err := json.Marshal(metadata)
if err != nil { if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err) return fmt.Errorf("failed to marshal metadata: %w", err)
@ -633,8 +694,239 @@ func (c *ClusterDiscoveryService) StoreRemotePeerMetadata(peerID peer.ID, metada
} }
c.logger.Debug("Stored remote peer metadata", c.logger.Debug("Stored remote peer metadata",
zap.String("peer_id", peerID.String()[:8]+"..."), zap.String("peer_id", shortPeerID(peerID)),
zap.String("node_id", metadata.NodeID)) zap.String("node_id", metadata.NodeID))
return nil return nil
} }
// adjustPeerAdvertisedAddresses adjusts peer metadata addresses by replacing localhost/loopback
// with the actual IP address from LibP2P connection. Returns (updated, staleNodeID).
// staleNodeID is non-empty if NodeID changed (indicating old entry should be cleaned up).
func (c *ClusterDiscoveryService) adjustPeerAdvertisedAddresses(peerID peer.ID, meta *discovery.RQLiteNodeMetadata) (bool, string) {
ip := c.selectPeerIP(peerID)
if ip == "" {
return false, ""
}
changed, stale := rewriteAdvertisedAddresses(meta, ip, true)
if changed {
c.logger.Debug("Normalized peer advertised RQLite addresses",
zap.String("peer_id", shortPeerID(peerID)),
zap.String("raft_address", meta.RaftAddress),
zap.String("http_address", meta.HTTPAddress))
}
return changed, stale
}
// adjustSelfAdvertisedAddresses adjusts our own metadata addresses by replacing localhost/loopback
// with the actual IP address from LibP2P host. Updates internal state if changed.
func (c *ClusterDiscoveryService) adjustSelfAdvertisedAddresses(meta *discovery.RQLiteNodeMetadata) bool {
ip := c.selectSelfIP()
if ip == "" {
return false
}
changed, _ := rewriteAdvertisedAddresses(meta, ip, true)
if !changed {
return false
}
// Update internal state with corrected addresses
c.mu.Lock()
c.raftAddress = meta.RaftAddress
c.httpAddress = meta.HTTPAddress
c.mu.Unlock()
return true
}
// selectPeerIP selects the best IP address for a peer from LibP2P connections.
// Prefers public IPs, falls back to private IPs if no public IP is available.
func (c *ClusterDiscoveryService) selectPeerIP(peerID peer.ID) string {
var fallback string
// First, try to get IP from active connections
for _, conn := range c.host.Network().ConnsToPeer(peerID) {
if ip, public := ipFromMultiaddr(conn.RemoteMultiaddr()); ip != "" {
if shouldReplaceHost(ip) {
continue
}
if public {
return ip
}
if fallback == "" {
fallback = ip
}
}
}
// Fallback to peerstore addresses
for _, addr := range c.host.Peerstore().Addrs(peerID) {
if ip, public := ipFromMultiaddr(addr); ip != "" {
if shouldReplaceHost(ip) {
continue
}
if public {
return ip
}
if fallback == "" {
fallback = ip
}
}
}
return fallback
}
// selectSelfIP selects the best IP address for ourselves from LibP2P host addresses.
// Prefers public IPs, falls back to private IPs if no public IP is available.
func (c *ClusterDiscoveryService) selectSelfIP() string {
var fallback string
for _, addr := range c.host.Addrs() {
if ip, public := ipFromMultiaddr(addr); ip != "" {
if shouldReplaceHost(ip) {
continue
}
if public {
return ip
}
if fallback == "" {
fallback = ip
}
}
}
return fallback
}
// rewriteAdvertisedAddresses rewrites RaftAddress and HTTPAddress in metadata,
// replacing localhost/loopback addresses with the provided IP.
// Returns (changed, staleNodeID). staleNodeID is non-empty if NodeID changed.
func rewriteAdvertisedAddresses(meta *discovery.RQLiteNodeMetadata, newHost string, allowNodeIDRewrite bool) (bool, string) {
if meta == nil || newHost == "" {
return false, ""
}
originalNodeID := meta.NodeID
changed := false
nodeIDChanged := false
// Replace host in RaftAddress if it's localhost/loopback
if newAddr, replaced := replaceAddressHost(meta.RaftAddress, newHost); replaced {
if meta.RaftAddress != newAddr {
meta.RaftAddress = newAddr
changed = true
}
}
// Replace host in HTTPAddress if it's localhost/loopback
if newAddr, replaced := replaceAddressHost(meta.HTTPAddress, newHost); replaced {
if meta.HTTPAddress != newAddr {
meta.HTTPAddress = newAddr
changed = true
}
}
// Update NodeID to match RaftAddress if it changed
if allowNodeIDRewrite {
if meta.RaftAddress != "" && (meta.NodeID == "" || meta.NodeID == originalNodeID || shouldReplaceHost(hostFromAddress(meta.NodeID))) {
if meta.NodeID != meta.RaftAddress {
meta.NodeID = meta.RaftAddress
nodeIDChanged = meta.NodeID != originalNodeID
if nodeIDChanged {
changed = true
}
}
}
}
if nodeIDChanged {
return changed, originalNodeID
}
return changed, ""
}
// replaceAddressHost replaces the host part of an address if it's localhost/loopback.
// Returns (newAddress, replaced). replaced is true if host was replaced.
func replaceAddressHost(address, newHost string) (string, bool) {
if address == "" || newHost == "" {
return address, false
}
host, port, err := net.SplitHostPort(address)
if err != nil {
return address, false
}
if !shouldReplaceHost(host) {
return address, false
}
return net.JoinHostPort(newHost, port), true
}
// shouldReplaceHost returns true if the host should be replaced (localhost, loopback, etc.)
func shouldReplaceHost(host string) bool {
if host == "" {
return true
}
if strings.EqualFold(host, "localhost") {
return true
}
// Check if it's a loopback or unspecified address
if addr, err := netip.ParseAddr(host); err == nil {
if addr.IsLoopback() || addr.IsUnspecified() {
return true
}
}
return false
}
// hostFromAddress extracts the host part from a host:port address
func hostFromAddress(address string) string {
host, _, err := net.SplitHostPort(address)
if err != nil {
return ""
}
return host
}
// ipFromMultiaddr extracts an IP address from a multiaddr and returns (ip, isPublic)
func ipFromMultiaddr(addr multiaddr.Multiaddr) (string, bool) {
if addr == nil {
return "", false
}
if v4, err := addr.ValueForProtocol(multiaddr.P_IP4); err == nil {
return v4, isPublicIP(v4)
}
if v6, err := addr.ValueForProtocol(multiaddr.P_IP6); err == nil {
return v6, isPublicIP(v6)
}
return "", false
}
// isPublicIP returns true if the IP is a public (non-private, non-loopback) address
func isPublicIP(ip string) bool {
addr, err := netip.ParseAddr(ip)
if err != nil {
return false
}
// Exclude loopback, unspecified, link-local, multicast, and private addresses
if addr.IsLoopback() || addr.IsUnspecified() || addr.IsLinkLocalUnicast() || addr.IsLinkLocalMulticast() || addr.IsPrivate() {
return false
}
return true
}
// shortPeerID returns a shortened version of a peer ID for logging
func shortPeerID(id peer.ID) string {
s := id.String()
if len(s) <= 8 {
return s
}
return s[:8] + "..."
}

View File

@ -112,8 +112,12 @@ func (r *RQLiteManager) Start(ctx context.Context) error {
return err return err
} }
// Apply migrations // Apply migrations - resolve path for production vs development
migrationsDir := "migrations" migrationsDir, err := r.resolveMigrationsDir()
if err != nil {
r.logger.Error("Failed to resolve migrations directory", zap.Error(err))
return fmt.Errorf("resolve migrations directory: %w", err)
}
if err := r.ApplyMigrations(ctx, migrationsDir); err != nil { if err := r.ApplyMigrations(ctx, migrationsDir); err != nil {
r.logger.Error("Migrations failed", zap.Error(err), zap.String("dir", migrationsDir)) r.logger.Error("Migrations failed", zap.Error(err), zap.String("dir", migrationsDir))
return fmt.Errorf("apply migrations: %w", err) return fmt.Errorf("apply migrations: %w", err)
@ -139,6 +143,23 @@ func (r *RQLiteManager) rqliteDataDirPath() (string, error) {
return filepath.Join(dataDir, "rqlite"), nil return filepath.Join(dataDir, "rqlite"), nil
} }
// resolveMigrationsDir resolves the migrations directory path for production vs development
// In production, migrations are at /home/debros/src/migrations
// In development, migrations are relative to the project root (migrations/)
func (r *RQLiteManager) resolveMigrationsDir() (string, error) {
// Check for production path first: /home/debros/src/migrations
productionPath := "/home/debros/src/migrations"
if _, err := os.Stat(productionPath); err == nil {
r.logger.Info("Using production migrations directory", zap.String("path", productionPath))
return productionPath, nil
}
// Fall back to relative path for development
devPath := "migrations"
r.logger.Info("Using development migrations directory", zap.String("path", devPath))
return devPath, nil
}
// prepareDataDir expands and creates the RQLite data directory // prepareDataDir expands and creates the RQLite data directory
func (r *RQLiteManager) prepareDataDir() (string, error) { func (r *RQLiteManager) prepareDataDir() (string, error) {
rqliteDataDir, err := r.rqliteDataDirPath() rqliteDataDir, err := r.rqliteDataDirPath()
@ -176,10 +197,13 @@ func (r *RQLiteManager) launchProcess(ctx context.Context, rqliteDataDir string)
joinArg = strings.TrimPrefix(joinArg, "https://") joinArg = strings.TrimPrefix(joinArg, "https://")
} }
// Wait for join target to become reachable to avoid forming a separate cluster (wait indefinitely) // Wait for join target to become reachable to avoid forming a separate cluster
if err := r.waitForJoinTarget(ctx, r.config.RQLiteJoinAddress, 0); err != nil { // Use 5 minute timeout to prevent infinite waits on bad configurations
joinTimeout := 5 * time.Minute
if err := r.waitForJoinTarget(ctx, r.config.RQLiteJoinAddress, joinTimeout); err != nil {
r.logger.Warn("Join target did not become reachable within timeout; will still attempt to join", r.logger.Warn("Join target did not become reachable within timeout; will still attempt to join",
zap.String("join_address", r.config.RQLiteJoinAddress), zap.String("join_address", r.config.RQLiteJoinAddress),
zap.Duration("timeout", joinTimeout),
zap.Error(err)) zap.Error(err))
} }

View File

@ -15,6 +15,7 @@
# bash scripts/install-debros-network.sh --domain example.com # bash scripts/install-debros-network.sh --domain example.com
set -e set -e
set -o pipefail
trap 'error "An error occurred. Installation aborted."; exit 1' ERR trap 'error "An error occurred. Installation aborted."; exit 1' ERR
# Color codes # Color codes
@ -100,19 +101,30 @@ check_root() {
get_latest_release() { get_latest_release() {
log "Fetching latest release..." log "Fetching latest release..."
# Try to get latest release with better error handling
RELEASE_DATA=""
if command -v jq &>/dev/null; then if command -v jq &>/dev/null; then
# Get the latest release (including pre-releases/nightly) # Get the latest release (including pre-releases/nightly)
LATEST_RELEASE=$(curl -fsSL -H "Accept: application/vnd.github+json" "$GITHUB_API/releases" | \ RELEASE_DATA=$(curl -fsSL -H "Accept: application/vnd.github+json" "$GITHUB_API/releases" 2>&1)
jq -r '.[0] | .tag_name') if [ $? -ne 0 ]; then
error "Failed to fetch release data from GitHub API"
error "Response: $RELEASE_DATA"
exit 1
fi
LATEST_RELEASE=$(echo "$RELEASE_DATA" | jq -r '.[0] | .tag_name' 2>/dev/null)
else else
LATEST_RELEASE=$(curl -fsSL "$GITHUB_API/releases" | \ RELEASE_DATA=$(curl -fsSL "$GITHUB_API/releases" 2>&1)
grep '"tag_name"' | \ if [ $? -ne 0 ]; then
head -1 | \ error "Failed to fetch release data from GitHub API"
cut -d'"' -f4) error "Response: $RELEASE_DATA"
exit 1
fi
LATEST_RELEASE=$(echo "$RELEASE_DATA" | grep '"tag_name"' | head -1 | cut -d'"' -f4)
fi fi
if [ -z "$LATEST_RELEASE" ]; then if [ -z "$LATEST_RELEASE" ] || [ "$LATEST_RELEASE" = "null" ]; then
error "Could not determine latest release version" error "Could not determine latest release version"
error "GitHub API response may be empty or rate-limited"
exit 1 exit 1
fi fi