diff --git a/pkg/cli/prod_commands.go b/pkg/cli/prod_commands.go index cf9315d..fce681f 100644 --- a/pkg/cli/prod_commands.go +++ b/pkg/cli/prod_commands.go @@ -2,11 +2,11 @@ package cli import ( "bufio" + "encoding/hex" "errors" "flag" "fmt" "net" - "net/http" "os" "os/exec" "path/filepath" @@ -17,10 +17,27 @@ import ( "github.com/DeBrosOfficial/network/pkg/config" "github.com/DeBrosOfficial/network/pkg/environments/production" "github.com/DeBrosOfficial/network/pkg/installer" - "github.com/DeBrosOfficial/network/pkg/tlsutil" "github.com/multiformats/go-multiaddr" ) +// IPFSPeerInfo holds IPFS peer information for configuring Peering.Peers +type IPFSPeerInfo struct { + PeerID string + Addrs []string +} + +// validateSwarmKey validates that a swarm key is 64 hex characters +func validateSwarmKey(key string) error { + key = strings.TrimSpace(key) + if len(key) != 64 { + return fmt.Errorf("swarm key must be 64 hex characters (32 bytes), got %d", len(key)) + } + if _, err := hex.DecodeString(key); err != nil { + return fmt.Errorf("swarm key must be valid hexadecimal: %w", err) + } + return nil +} + // runInteractiveInstaller launches the TUI installer func runInteractiveInstaller() { config, err := installer.Run() @@ -46,9 +63,19 @@ func runInteractiveInstaller() { if config.ClusterSecret != "" { args = append(args, "--cluster-secret", config.ClusterSecret) } + if config.SwarmKeyHex != "" { + args = append(args, "--swarm-key", config.SwarmKeyHex) + } if len(config.Peers) > 0 { args = append(args, "--peers", strings.Join(config.Peers, ",")) } + // Pass IPFS peer info for Peering.Peers configuration + if config.IPFSPeerID != "" { + args = append(args, "--ipfs-peer", config.IPFSPeerID) + } + if len(config.IPFSSwarmAddrs) > 0 { + args = append(args, "--ipfs-addrs", strings.Join(config.IPFSSwarmAddrs, ",")) + } } // Re-run with collected args @@ -285,6 +312,9 @@ func showProdHelp() { fmt.Printf(" --peers ADDRS - Comma-separated peer multiaddrs (for joining cluster)\n") fmt.Printf(" --join ADDR - RQLite join address IP:port (for joining cluster)\n") fmt.Printf(" --cluster-secret HEX - 64-hex cluster secret (required when joining)\n") + fmt.Printf(" --swarm-key HEX - 64-hex IPFS swarm key (required when joining)\n") + fmt.Printf(" --ipfs-peer ID - IPFS peer ID to connect to (auto-discovered)\n") + fmt.Printf(" --ipfs-addrs ADDRS - IPFS swarm addresses (auto-discovered)\n") fmt.Printf(" --branch BRANCH - Git branch to use (main or nightly, default: main)\n") fmt.Printf(" --no-pull - Skip git clone/pull, use existing /home/debros/src\n") fmt.Printf(" --ignore-resource-checks - Skip disk/RAM/CPU prerequisite validation\n") @@ -312,7 +342,7 @@ func showProdHelp() { fmt.Printf(" # Join existing cluster\n") fmt.Printf(" sudo orama install --vps-ip 203.0.113.2 --domain node-2.orama.network \\\n") fmt.Printf(" --peers /ip4/203.0.113.1/tcp/4001/p2p/12D3KooW... \\\n") - fmt.Printf(" --cluster-secret <64-hex-secret>\n\n") + fmt.Printf(" --cluster-secret <64-hex-secret> --swarm-key <64-hex-swarm-key>\n\n") fmt.Printf(" # Upgrade\n") fmt.Printf(" sudo orama upgrade --restart\n\n") fmt.Printf(" # Service management\n") @@ -336,6 +366,9 @@ func handleProdInstall(args []string) { joinAddress := fs.String("join", "", "RQLite join address (IP:port) to join existing cluster") branch := fs.String("branch", "main", "Git branch to use (main or nightly)") clusterSecret := fs.String("cluster-secret", "", "Hex-encoded 32-byte cluster secret (for joining existing cluster)") + swarmKey := fs.String("swarm-key", "", "64-hex IPFS swarm key (for joining existing private network)") + ipfsPeerID := fs.String("ipfs-peer", "", "IPFS peer ID to connect to (auto-discovered from peer domain)") + ipfsAddrs := fs.String("ipfs-addrs", "", "Comma-separated IPFS swarm addresses (auto-discovered from peer domain)") interactive := fs.Bool("interactive", false, "Run interactive TUI installer") dryRun := fs.Bool("dry-run", false, "Show what would be done without making changes") noPull := fs.Bool("no-pull", false, "Skip git clone/pull, use existing /home/debros/src") @@ -398,6 +431,17 @@ func handleProdInstall(args []string) { fmt.Fprintf(os.Stderr, "❌ Invalid --cluster-secret: %v\n", err) os.Exit(1) } + // Swarm key is required when joining + if *swarmKey == "" { + fmt.Fprintf(os.Stderr, "❌ --swarm-key is required when joining an existing cluster\n") + fmt.Fprintf(os.Stderr, " Provide the 64-hex swarm key from an existing node:\n") + fmt.Fprintf(os.Stderr, " cat ~/.orama/secrets/swarm.key | tail -1\n") + os.Exit(1) + } + if err := validateSwarmKey(*swarmKey); err != nil { + fmt.Fprintf(os.Stderr, "❌ Invalid --swarm-key: %v\n", err) + os.Exit(1) + } } oramaHome := "/home/debros" @@ -418,6 +462,32 @@ func handleProdInstall(args []string) { fmt.Printf(" ✓ Cluster secret saved\n") } + // If swarm key was provided, save it to secrets directory in full format + if *swarmKey != "" { + secretsDir := filepath.Join(oramaDir, "secrets") + if err := os.MkdirAll(secretsDir, 0755); err != nil { + fmt.Fprintf(os.Stderr, "❌ Failed to create secrets directory: %v\n", err) + os.Exit(1) + } + // Convert 64-hex key to full swarm.key format + swarmKeyContent := fmt.Sprintf("/key/swarm/psk/1.0.0/\n/base16/\n%s\n", strings.ToUpper(*swarmKey)) + swarmKeyPath := filepath.Join(secretsDir, "swarm.key") + if err := os.WriteFile(swarmKeyPath, []byte(swarmKeyContent), 0600); err != nil { + fmt.Fprintf(os.Stderr, "❌ Failed to save swarm key: %v\n", err) + os.Exit(1) + } + fmt.Printf(" ✓ Swarm key saved\n") + } + + // Store IPFS peer info for later use in IPFS configuration + var ipfsPeerInfo *IPFSPeerInfo + if *ipfsPeerID != "" && *ipfsAddrs != "" { + ipfsPeerInfo = &IPFSPeerInfo{ + PeerID: *ipfsPeerID, + Addrs: strings.Split(*ipfsAddrs, ","), + } + } + setup := production.NewProductionSetup(oramaHome, os.Stdout, *force, *branch, *noPull, *skipResourceChecks) // Inform user if skipping git pull @@ -480,7 +550,14 @@ func handleProdInstall(args []string) { // Phase 2c: Initialize services (after secrets are in place) fmt.Printf("\nPhase 2c: Initializing services...\n") - if err := setup.Phase2cInitializeServices(peers, *vpsIP); err != nil { + var prodIPFSPeer *production.IPFSPeerInfo + if ipfsPeerInfo != nil { + prodIPFSPeer = &production.IPFSPeerInfo{ + PeerID: ipfsPeerInfo.PeerID, + Addrs: ipfsPeerInfo.Addrs, + } + } + if err := setup.Phase2cInitializeServices(peers, *vpsIP, prodIPFSPeer); err != nil { fmt.Fprintf(os.Stderr, "❌ Service initialization failed: %v\n", err) os.Exit(1) } @@ -508,17 +585,37 @@ func handleProdInstall(args []string) { os.Exit(1) } - // Verify all services are running correctly with exponential backoff retries - fmt.Printf("\n⏳ Verifying services are healthy...\n") - if err := verifyProductionRuntimeWithRetry("prod install", 5, 3*time.Second); err != nil { - fmt.Fprintf(os.Stderr, "❌ %v\n", err) - fmt.Fprintf(os.Stderr, " Installation completed but services are not healthy. Check logs with: orama logs \n") - os.Exit(1) - } - // Log completion with actual peer ID setup.LogSetupComplete(setup.NodePeerID) - fmt.Printf("✅ Production installation complete and healthy!\n\n") + fmt.Printf("✅ Production installation complete!\n\n") + + // For first node, print important secrets and identifiers + if isFirstNode { + fmt.Printf("📋 Save these for joining future nodes:\n\n") + + // Print cluster secret + clusterSecretPath := filepath.Join(oramaDir, "secrets", "cluster-secret") + if clusterSecretData, err := os.ReadFile(clusterSecretPath); err == nil { + fmt.Printf(" Cluster Secret (--cluster-secret):\n") + fmt.Printf(" %s\n\n", string(clusterSecretData)) + } + + // Print swarm key + swarmKeyPath := filepath.Join(oramaDir, "secrets", "swarm.key") + if swarmKeyData, err := os.ReadFile(swarmKeyPath); err == nil { + swarmKeyContent := strings.TrimSpace(string(swarmKeyData)) + lines := strings.Split(swarmKeyContent, "\n") + if len(lines) >= 3 { + // Extract just the hex part (last line) + fmt.Printf(" IPFS Swarm Key (--swarm-key, last line only):\n") + fmt.Printf(" %s\n\n", lines[len(lines)-1]) + } + } + + // Print peer ID + fmt.Printf(" Node Peer ID:\n") + fmt.Printf(" %s\n\n", setup.NodePeerID) + } } func handleProdUpgrade(args []string) { @@ -781,8 +878,9 @@ func handleProdUpgrade(args []string) { // Phase 2c: Ensure services are properly initialized (fixes existing repos) // Now that we have peers and VPS IP, we can properly configure IPFS Cluster + // Note: IPFS peer info is nil for upgrades - peering is only configured during initial install fmt.Printf("\nPhase 2c: Ensuring services are properly initialized...\n") - if err := setup.Phase2cInitializeServices(peers, vpsIP); err != nil { + if err := setup.Phase2cInitializeServices(peers, vpsIP, nil); err != nil { fmt.Fprintf(os.Stderr, "❌ Service initialization failed: %v\n", err) os.Exit(1) } @@ -818,14 +916,6 @@ func handleProdUpgrade(args []string) { } } fmt.Printf(" ✓ All services restarted\n") - // Verify services are healthy after restart with exponential backoff - fmt.Printf(" ⏳ Verifying services are healthy...\n") - if err := verifyProductionRuntimeWithRetry("prod upgrade --restart", 5, 3*time.Second); err != nil { - fmt.Fprintf(os.Stderr, "❌ %v\n", err) - fmt.Fprintf(os.Stderr, " Upgrade completed but services are not healthy. Check logs with: orama logs \n") - os.Exit(1) - } - fmt.Printf(" ✅ All services verified healthy\n") } } else { fmt.Printf(" To apply changes, restart services:\n") @@ -1109,109 +1199,12 @@ func ensurePortsAvailable(action string, ports []portSpec) error { return nil } -func checkHTTP(client *http.Client, method, url, label string) error { - req, err := http.NewRequest(method, url, nil) - if err != nil { - return fmt.Errorf("%s check failed: %w", label, err) - } - resp, err := client.Do(req) - if err != nil { - return fmt.Errorf("%s check failed: %w", label, err) - } - defer resp.Body.Close() - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return fmt.Errorf("%s returned HTTP %d", label, resp.StatusCode) - } - return nil -} - -func serviceExists(name string) bool { - unitPath := filepath.Join("/etc/systemd/system", name+".service") - _, err := os.Stat(unitPath) - return err == nil -} - -// verifyProductionRuntimeWithRetry verifies services with exponential backoff retries -func verifyProductionRuntimeWithRetry(action string, maxAttempts int, initialWait time.Duration) error { - wait := initialWait - var lastErr error - - for attempt := 1; attempt <= maxAttempts; attempt++ { - lastErr = verifyProductionRuntime(action) - if lastErr == nil { - return nil - } - - if attempt < maxAttempts { - fmt.Printf(" ⏳ Services not ready (attempt %d/%d), waiting %v...\n", attempt, maxAttempts, wait) - time.Sleep(wait) - // Exponential backoff with cap at 30 seconds - wait = wait * 2 - if wait > 30*time.Second { - wait = 30 * time.Second - } - } - } - - return lastErr -} - -func verifyProductionRuntime(action string) error { - services := getProductionServices() - issues := make([]string, 0) - - for _, svc := range services { - active, err := isServiceActive(svc) - if err != nil { - issues = append(issues, fmt.Sprintf("%s status unknown (%v)", svc, err)) - continue - } - if !active { - issues = append(issues, fmt.Sprintf("%s is inactive", svc)) - } - } - - client := tlsutil.NewHTTPClient(3 * time.Second) - - if err := checkHTTP(client, "GET", "http://127.0.0.1:5001/status", "RQLite status"); err == nil { - } else if serviceExists("debros-node") { - issues = append(issues, err.Error()) - } - - if err := checkHTTP(client, "POST", "http://127.0.0.1:4501/api/v0/version", "IPFS API"); err == nil { - } else if serviceExists("debros-ipfs") { - issues = append(issues, err.Error()) - } - - if err := checkHTTP(client, "GET", "http://127.0.0.1:9094/health", "IPFS Cluster"); err == nil { - } else if serviceExists("debros-ipfs-cluster") { - issues = append(issues, err.Error()) - } - - if err := checkHTTP(client, "GET", "http://127.0.0.1:6001/health", "Gateway health"); err == nil { - } else if serviceExists("debros-node") { - // Gateway is now embedded in node, check debros-node instead - issues = append(issues, err.Error()) - } - - if err := checkHTTP(client, "GET", "http://127.0.0.1:3320/ping", "Olric ping"); err == nil { - } else if serviceExists("debros-olric") { - issues = append(issues, err.Error()) - } - - if len(issues) > 0 { - return fmt.Errorf("%s verification failed:\n - %s", action, strings.Join(issues, "\n - ")) - } - return nil -} - // getProductionServices returns a list of all DeBros production service names that exist func getProductionServices() []string { // Unified service names (no bootstrap/node distinction) allServices := []string{ "debros-gateway", "debros-node", - "debros-rqlite", "debros-olric", "debros-ipfs-cluster", "debros-ipfs", @@ -1340,17 +1333,7 @@ func handleProdStart() { fmt.Printf(" ⏳ Waiting for services to initialize...\n") time.Sleep(5 * time.Second) - // Verify all services are healthy with exponential backoff retries - fmt.Printf(" ⏳ Verifying services are healthy...\n") - if err := verifyProductionRuntimeWithRetry("prod start", 6, 2*time.Second); err != nil { - fmt.Fprintf(os.Stderr, "❌ %v\n", err) - fmt.Fprintf(os.Stderr, "\n Services may still be starting. Check status with:\n") - fmt.Fprintf(os.Stderr, " systemctl status debros-*\n") - fmt.Fprintf(os.Stderr, " orama logs \n") - os.Exit(1) - } - - fmt.Printf("\n✅ All services started and healthy\n") + fmt.Printf("\n✅ All services started\n") } func handleProdStop() { @@ -1508,14 +1491,7 @@ func handleProdRestart() { } } - // Verify all services are healthy with exponential backoff retries - fmt.Printf(" ⏳ Verifying services are healthy...\n") - if err := verifyProductionRuntimeWithRetry("prod restart", 5, 3*time.Second); err != nil { - fmt.Fprintf(os.Stderr, "❌ %v\n", err) - os.Exit(1) - } - - fmt.Printf("\n✅ All services restarted and healthy\n") + fmt.Printf("\n✅ All services restarted\n") } func handleProdUninstall() { @@ -1540,7 +1516,6 @@ func handleProdUninstall() { services := []string{ "debros-gateway", "debros-node", - "debros-rqlite", "debros-olric", "debros-ipfs-cluster", "debros-ipfs", diff --git a/pkg/client/implementations.go b/pkg/client/implementations.go index 43f9564..1eccec5 100644 --- a/pkg/client/implementations.go +++ b/pkg/client/implementations.go @@ -2,7 +2,9 @@ package client import ( "context" + "encoding/json" "fmt" + "net/http" "strings" "sync" "time" @@ -504,15 +506,58 @@ func (n *NetworkInfoImpl) GetStatus(ctx context.Context) (*NetworkStatus, error) } } + // Try to get IPFS peer info (optional - don't fail if unavailable) + ipfsInfo := queryIPFSPeerInfo() + return &NetworkStatus{ NodeID: host.ID().String(), + PeerID: host.ID().String(), Connected: true, PeerCount: len(connectedPeers), DatabaseSize: dbSize, Uptime: time.Since(n.client.startTime), + IPFS: ipfsInfo, }, nil } +// queryIPFSPeerInfo queries the local IPFS API for peer information +// Returns nil if IPFS is not running or unavailable +func queryIPFSPeerInfo() *IPFSPeerInfo { + // IPFS API typically runs on port 4501 in our setup + client := &http.Client{Timeout: 2 * time.Second} + resp, err := client.Post("http://localhost:4501/api/v0/id", "", nil) + if err != nil { + return nil // IPFS not available + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil + } + + var result struct { + ID string `json:"ID"` + Addresses []string `json:"Addresses"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil + } + + // Filter addresses to only include public/routable ones + var swarmAddrs []string + for _, addr := range result.Addresses { + // Skip loopback and private addresses for external discovery + if !strings.Contains(addr, "127.0.0.1") && !strings.Contains(addr, "/ip6/::1") { + swarmAddrs = append(swarmAddrs, addr) + } + } + + return &IPFSPeerInfo{ + PeerID: result.ID, + SwarmAddresses: swarmAddrs, + } +} + // ConnectToPeer connects to a specific peer func (n *NetworkInfoImpl) ConnectToPeer(ctx context.Context, peerAddr string) error { if !n.client.isConnected() { diff --git a/pkg/client/interface.go b/pkg/client/interface.go index bae966e..b4de258 100644 --- a/pkg/client/interface.go +++ b/pkg/client/interface.go @@ -115,10 +115,18 @@ type PeerInfo struct { // NetworkStatus contains overall network status type NetworkStatus struct { NodeID string `json:"node_id"` + PeerID string `json:"peer_id"` Connected bool `json:"connected"` PeerCount int `json:"peer_count"` DatabaseSize int64 `json:"database_size"` Uptime time.Duration `json:"uptime"` + IPFS *IPFSPeerInfo `json:"ipfs,omitempty"` +} + +// IPFSPeerInfo contains IPFS peer information for discovery +type IPFSPeerInfo struct { + PeerID string `json:"peer_id"` + SwarmAddresses []string `json:"swarm_addresses"` } // HealthStatus contains health check information diff --git a/pkg/config/config.go b/pkg/config/config.go index ab5b6c9..2750b0d 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -38,6 +38,13 @@ type DatabaseConfig struct { RQLiteRaftPort int `yaml:"rqlite_raft_port"` // RQLite Raft consensus port RQLiteJoinAddress string `yaml:"rqlite_join_address"` // Address to join RQLite cluster + // RQLite node-to-node TLS encryption (for inter-node Raft communication) + // See: https://rqlite.io/docs/guides/security/#encrypting-node-to-node-communication + NodeCert string `yaml:"node_cert"` // Path to X.509 certificate for node-to-node communication + NodeKey string `yaml:"node_key"` // Path to X.509 private key for node-to-node communication + NodeCACert string `yaml:"node_ca_cert"` // Path to CA certificate (optional, uses system CA if not set) + NodeNoVerify bool `yaml:"node_no_verify"` // Skip certificate verification (for testing/self-signed certs) + // Dynamic discovery configuration (always enabled) ClusterSyncInterval time.Duration `yaml:"cluster_sync_interval"` // default: 30s PeerInactivityLimit time.Duration `yaml:"peer_inactivity_limit"` // default: 24h diff --git a/pkg/environments/development/config.go b/pkg/environments/development/config.go index e83ec8b..66b4904 100644 --- a/pkg/environments/development/config.go +++ b/pkg/environments/development/config.go @@ -178,9 +178,11 @@ func (ce *ConfigEnsurer) ensureOlric() error { topology := DefaultTopology() data := templates.OlricConfigData{ - BindAddr: "127.0.0.1", - HTTPPort: topology.OlricHTTPPort, - MemberlistPort: topology.OlricMemberPort, + ServerBindAddr: "127.0.0.1", + HTTPPort: topology.OlricHTTPPort, + MemberlistBindAddr: "127.0.0.1", // localhost for development + MemberlistPort: topology.OlricMemberPort, + MemberlistEnvironment: "local", // development environment } config, err := templates.RenderOlricConfig(data) diff --git a/pkg/environments/production/config.go b/pkg/environments/production/config.go index cdcc23b..c5167eb 100644 --- a/pkg/environments/production/config.go +++ b/pkg/environments/production/config.go @@ -175,6 +175,18 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri HTTPPort: httpPort, HTTPSPort: httpsPort, } + + // When HTTPS/SNI is enabled, configure RQLite node-to-node TLS encryption + // This allows Raft traffic to be routed through the SNI gateway + // Uses the same certificates as the SNI gateway (Let's Encrypt or self-signed) + if enableHTTPS && domain != "" { + data.NodeCert = filepath.Join(tlsCacheDir, domain+".crt") + data.NodeKey = filepath.Join(tlsCacheDir, domain+".key") + // Skip verification since nodes may have different domain certificates + // and we're routing through the SNI gateway which terminates TLS + data.NodeNoVerify = true + } + return templates.RenderNodeConfig(data) } @@ -200,11 +212,13 @@ func (cg *ConfigGenerator) GenerateGatewayConfig(peerAddresses []string, enableH } // GenerateOlricConfig generates Olric configuration -func (cg *ConfigGenerator) GenerateOlricConfig(bindAddr string, httpPort, memberlistPort int) (string, error) { +func (cg *ConfigGenerator) GenerateOlricConfig(serverBindAddr string, httpPort int, memberlistBindAddr string, memberlistPort int, memberlistEnv string) (string, error) { data := templates.OlricConfigData{ - BindAddr: bindAddr, - HTTPPort: httpPort, - MemberlistPort: memberlistPort, + ServerBindAddr: serverBindAddr, + HTTPPort: httpPort, + MemberlistBindAddr: memberlistBindAddr, + MemberlistPort: memberlistPort, + MemberlistEnvironment: memberlistEnv, } return templates.RenderOlricConfig(data) } diff --git a/pkg/environments/production/installers.go b/pkg/environments/production/installers.go index 3b579f6..1c34294 100644 --- a/pkg/environments/production/installers.go +++ b/pkg/environments/production/installers.go @@ -412,8 +412,15 @@ func (bi *BinaryInstaller) InstallSystemDependencies() error { return nil } +// IPFSPeerInfo holds IPFS peer information for configuring Peering.Peers +type IPFSPeerInfo struct { + PeerID string + Addrs []string +} + // InitializeIPFSRepo initializes an IPFS repository for a node (unified - no bootstrap/node distinction) -func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath string, apiPort, gatewayPort, swarmPort int) error { +// If ipfsPeer is provided, configures Peering.Peers for peer discovery in private networks +func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath string, apiPort, gatewayPort, swarmPort int, ipfsPeer *IPFSPeerInfo) error { configPath := filepath.Join(ipfsRepoPath, "config") repoExists := false if _, err := os.Stat(configPath); err == nil { @@ -494,6 +501,14 @@ func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath return fmt.Errorf("failed while %s: %v\n%s", step.desc, err, string(output)) } } + + // Configure Peering.Peers if we have peer info (for private network discovery) + if ipfsPeer != nil && ipfsPeer.PeerID != "" && len(ipfsPeer.Addrs) > 0 { + fmt.Fprintf(bi.logWriter, " Configuring Peering.Peers for private network discovery...\n") + if err := bi.configureIPFSPeering(ipfsRepoPath, ipfsPeer); err != nil { + return fmt.Errorf("failed to configure IPFS peering: %w", err) + } + } } // Fix ownership (best-effort, don't fail if it doesn't work) @@ -555,6 +570,53 @@ func (bi *BinaryInstaller) configureIPFSAddresses(ipfsRepoPath string, apiPort, return nil } +// configureIPFSPeering configures Peering.Peers in the IPFS config for private network discovery +// This allows nodes in a private swarm to find each other even without bootstrap peers +func (bi *BinaryInstaller) configureIPFSPeering(ipfsRepoPath string, peer *IPFSPeerInfo) error { + configPath := filepath.Join(ipfsRepoPath, "config") + + // Read existing config + data, err := os.ReadFile(configPath) + if err != nil { + return fmt.Errorf("failed to read IPFS config: %w", err) + } + + var config map[string]interface{} + if err := json.Unmarshal(data, &config); err != nil { + return fmt.Errorf("failed to parse IPFS config: %w", err) + } + + // Get existing Peering section or create new one + peering, ok := config["Peering"].(map[string]interface{}) + if !ok { + peering = make(map[string]interface{}) + } + + // Create peer entry + peerEntry := map[string]interface{}{ + "ID": peer.PeerID, + "Addrs": peer.Addrs, + } + + // Set Peering.Peers + peering["Peers"] = []interface{}{peerEntry} + config["Peering"] = peering + + fmt.Fprintf(bi.logWriter, " Adding peer: %s (%d addresses)\n", peer.PeerID, len(peer.Addrs)) + + // Write config back + updatedData, err := json.MarshalIndent(config, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal IPFS config: %w", err) + } + + if err := os.WriteFile(configPath, updatedData, 0600); err != nil { + return fmt.Errorf("failed to write IPFS config: %w", err) + } + + return nil +} + // InitializeIPFSClusterConfig initializes IPFS Cluster configuration (unified - no bootstrap/node distinction) // This runs `ipfs-cluster-service init` to create the service.json configuration file. // For existing installations, it ensures the cluster secret is up to date. diff --git a/pkg/environments/production/orchestrator.go b/pkg/environments/production/orchestrator.go index 72d9e71..91bf9c6 100644 --- a/pkg/environments/production/orchestrator.go +++ b/pkg/environments/production/orchestrator.go @@ -272,7 +272,8 @@ func (ps *ProductionSetup) Phase2bInstallBinaries() error { } // Phase2cInitializeServices initializes service repositories and configurations -func (ps *ProductionSetup) Phase2cInitializeServices(peerAddresses []string, vpsIP string) error { +// ipfsPeer can be nil for the first node, or contain peer info for joining nodes +func (ps *ProductionSetup) Phase2cInitializeServices(peerAddresses []string, vpsIP string, ipfsPeer *IPFSPeerInfo) error { ps.logf("Phase 2c: Initializing services...") // Ensure directories exist (unified structure) @@ -286,7 +287,7 @@ func (ps *ProductionSetup) Phase2cInitializeServices(peerAddresses []string, vps // Initialize IPFS repo with correct path structure // Use port 4501 for API (to avoid conflict with RQLite on 5001), 8080 for gateway (standard), 4101 for swarm (to avoid conflict with LibP2P on 4001) ipfsRepoPath := filepath.Join(dataDir, "ipfs", "repo") - if err := ps.binaryInstaller.InitializeIPFSRepo(ipfsRepoPath, filepath.Join(ps.oramaDir, "secrets", "swarm.key"), 4501, 8080, 4101); err != nil { + if err := ps.binaryInstaller.InitializeIPFSRepo(ipfsRepoPath, filepath.Join(ps.oramaDir, "secrets", "swarm.key"), 4501, 8080, 4101, ipfsPeer); err != nil { return fmt.Errorf("failed to initialize IPFS repo: %w", err) } @@ -379,10 +380,17 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s // Gateway configuration is now embedded in each node's config // No separate gateway.yaml needed - each node runs its own embedded gateway - // Olric config - bind to localhost for security - // External access goes through the HTTP gateway - olricBindAddr := "127.0.0.1" - olricConfig, err := ps.configGenerator.GenerateOlricConfig(olricBindAddr, 3320, 3322) + // Olric config: + // - HTTP API binds to localhost for security (accessed via gateway) + // - Memberlist binds to 0.0.0.0 for cluster communication across nodes + // - Environment "lan" for production multi-node clustering + olricConfig, err := ps.configGenerator.GenerateOlricConfig( + "127.0.0.1", // HTTP API on localhost + 3320, + "0.0.0.0", // Memberlist on all interfaces for clustering + 3322, + "lan", // Production environment + ) if err != nil { return fmt.Errorf("failed to generate olric config: %w", err) } @@ -417,11 +425,6 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { if err != nil { return fmt.Errorf("ipfs-cluster-service binary not available: %w", err) } - // RQLite binary for separate service - rqliteBinary, err := ps.binaryInstaller.ResolveBinaryPath("rqlited", "/usr/local/bin/rqlited", "/usr/bin/rqlited") - if err != nil { - return fmt.Errorf("rqlited binary not available: %w", err) - } olricBinary, err := ps.binaryInstaller.ResolveBinaryPath("olric-server", "/usr/local/bin/olric-server", "/usr/bin/olric-server") if err != nil { return fmt.Errorf("olric-server binary not available: %w", err) @@ -441,17 +444,7 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { } ps.logf(" ✓ IPFS Cluster service created: debros-ipfs-cluster.service") - // RQLite service (join address and advertise IP will be handled by node bootstrap) - // When HTTPS/SNI is enabled, RQLite listens on internal port 7002 (SNI gateway handles external 7001) - raftPort := 7001 - if enableHTTPS { - raftPort = 7002 - } - rqliteUnit := ps.serviceGenerator.GenerateRQLiteService(rqliteBinary, 5001, raftPort, "", "") - if err := ps.serviceController.WriteServiceUnit("debros-rqlite.service", rqliteUnit); err != nil { - return fmt.Errorf("failed to write RQLite service: %w", err) - } - ps.logf(" ✓ RQLite service created: debros-rqlite.service") + // RQLite is managed internally by each node - no separate systemd service needed // Olric service olricUnit := ps.serviceGenerator.GenerateOlricService(olricBinary) @@ -482,7 +475,8 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { // Enable services (unified names - no bootstrap/node distinction) // Note: debros-gateway.service is no longer needed - each node has an embedded gateway - services := []string{"debros-ipfs.service", "debros-ipfs-cluster.service", "debros-rqlite.service", "debros-olric.service", "debros-node.service", "debros-anyone-client.service"} + // Note: debros-rqlite.service is NOT created - RQLite is managed by each node internally + services := []string{"debros-ipfs.service", "debros-ipfs-cluster.service", "debros-olric.service", "debros-node.service", "debros-anyone-client.service"} for _, svc := range services { if err := ps.serviceController.EnableService(svc); err != nil { ps.logf(" ⚠️ Failed to enable %s: %v", svc, err) @@ -494,7 +488,7 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { // Start services in dependency order ps.logf(" Starting services...") - // Start infrastructure first (IPFS, Olric, Anyone Client) - RQLite is managed by node + // Start infrastructure first (IPFS, Olric, Anyone Client) - RQLite is managed internally by each node infraServices := []string{"debros-ipfs.service", "debros-olric.service"} // Check if port 9050 is already in use (e.g., another anyone-client or similar service) @@ -547,13 +541,12 @@ func (ps *ProductionSetup) LogSetupComplete(peerID string) { ps.logf("\nLog Files:") ps.logf(" %s/logs/ipfs.log", ps.oramaDir) ps.logf(" %s/logs/ipfs-cluster.log", ps.oramaDir) - ps.logf(" %s/logs/rqlite.log", ps.oramaDir) ps.logf(" %s/logs/olric.log", ps.oramaDir) ps.logf(" %s/logs/node.log", ps.oramaDir) ps.logf(" %s/logs/gateway.log", ps.oramaDir) ps.logf(" %s/logs/anyone-client.log", ps.oramaDir) ps.logf("\nStart All Services:") - ps.logf(" systemctl start debros-ipfs debros-ipfs-cluster debros-rqlite debros-olric debros-anyone-client debros-node") + ps.logf(" systemctl start debros-ipfs debros-ipfs-cluster debros-olric debros-anyone-client debros-node") ps.logf("\nVerify Installation:") ps.logf(" curl http://localhost:6001/health") ps.logf(" curl http://localhost:5001/status") diff --git a/pkg/environments/production/provisioner.go b/pkg/environments/production/provisioner.go index d11edfc..d095dbe 100644 --- a/pkg/environments/production/provisioner.go +++ b/pkg/environments/production/provisioner.go @@ -52,7 +52,9 @@ func (fp *FilesystemProvisioner) EnsureDirectoryStructure() error { // The correct location is .orama/secrets/cluster-secret strayClusterSecret := filepath.Join(fp.oramaDir, "cluster-secret") if _, err := os.Stat(strayClusterSecret); err == nil { - os.Remove(strayClusterSecret) + if err := os.Remove(strayClusterSecret); err != nil { + return fmt.Errorf("failed to remove stray cluster-secret file: %w", err) + } } // Create log files with correct permissions so systemd can write to them @@ -63,7 +65,6 @@ func (fp *FilesystemProvisioner) EnsureDirectoryStructure() error { "ipfs.log", "ipfs-cluster.log", "node.log", - "rqlite.log", "anyone-client.log", } diff --git a/pkg/environments/production/services.go b/pkg/environments/production/services.go index 3f181dd..10136b6 100644 --- a/pkg/environments/production/services.go +++ b/pkg/environments/production/services.go @@ -115,7 +115,6 @@ WantedBy=multi-user.target // GenerateRQLiteService generates the RQLite systemd unit func (ssg *SystemdServiceGenerator) GenerateRQLiteService(rqliteBinary string, httpPort, raftPort int, joinAddr string, advertiseIP string) string { dataDir := filepath.Join(ssg.oramaDir, "data", "rqlite") - logFile := filepath.Join(ssg.oramaDir, "logs", "rqlite.log") // Use public IP for advertise if provided, otherwise default to localhost if advertiseIP == "" { @@ -164,7 +163,7 @@ ReadWritePaths=%[4]s [Install] WantedBy=multi-user.target -`, ssg.oramaHome, args, logFile, ssg.oramaDir, rqliteBinary) +`, ssg.oramaHome, args, ssg.oramaDir, rqliteBinary) } // GenerateOlricService generates the Olric systemd unit @@ -213,9 +212,8 @@ func (ssg *SystemdServiceGenerator) GenerateNodeService() string { return fmt.Sprintf(`[Unit] Description=DeBros Network Node -After=debros-ipfs-cluster.service debros-rqlite.service -Wants=debros-ipfs-cluster.service debros-rqlite.service -Requires=debros-ipfs-cluster.service debros-rqlite.service +After=debros-ipfs-cluster.service debros-olric.service +Wants=debros-ipfs-cluster.service debros-olric.service [Service] Type=simple diff --git a/pkg/environments/templates/node.yaml b/pkg/environments/templates/node.yaml index 764f3da..bd1cb5a 100644 --- a/pkg/environments/templates/node.yaml +++ b/pkg/environments/templates/node.yaml @@ -15,9 +15,14 @@ database: rqlite_port: {{.RQLiteHTTPPort}} rqlite_raft_port: {{.RQLiteRaftInternalPort}} rqlite_join_address: "{{.RQLiteJoinAddress}}" - cluster_sync_interval: "30s" + {{if .NodeCert}}# Node-to-node TLS encryption for Raft communication (required for SNI gateway routing) + node_cert: "{{.NodeCert}}" + node_key: "{{.NodeKey}}" + {{if .NodeCACert}}node_ca_cert: "{{.NodeCACert}}" + {{end}}{{if .NodeNoVerify}}node_no_verify: true + {{end}}{{end}}cluster_sync_interval: "30s" peer_inactivity_limit: "24h" - min_cluster_size: 3 + min_cluster_size: 1 ipfs: cluster_api_url: "http://localhost:{{.ClusterAPIPort}}" api_url: "http://localhost:{{.IPFSAPIPort}}" diff --git a/pkg/environments/templates/olric.yaml b/pkg/environments/templates/olric.yaml index cb5f85b..c1d00cc 100644 --- a/pkg/environments/templates/olric.yaml +++ b/pkg/environments/templates/olric.yaml @@ -1,8 +1,8 @@ server: - bindAddr: "{{.BindAddr}}" + bindAddr: "{{.ServerBindAddr}}" bindPort: { { .HTTPPort } } memberlist: - environment: local - bindAddr: "{{.BindAddr}}" + environment: { { .MemberlistEnvironment } } + bindAddr: "{{.MemberlistBindAddr}}" bindPort: { { .MemberlistPort } } diff --git a/pkg/environments/templates/render.go b/pkg/environments/templates/render.go index baeb020..0ee2209 100644 --- a/pkg/environments/templates/render.go +++ b/pkg/environments/templates/render.go @@ -31,6 +31,13 @@ type NodeConfigData struct { TLSCacheDir string // Directory for ACME certificate cache HTTPPort int // HTTP port for ACME challenges (usually 80) HTTPSPort int // HTTPS port (usually 443) + + // Node-to-node TLS encryption for RQLite Raft communication + // Required when using SNI gateway for Raft traffic routing + NodeCert string // Path to X.509 certificate for node-to-node communication + NodeKey string // Path to X.509 private key for node-to-node communication + NodeCACert string // Path to CA certificate (optional) + NodeNoVerify bool // Skip certificate verification (for self-signed certs) } // GatewayConfigData holds parameters for gateway.yaml rendering @@ -48,9 +55,11 @@ type GatewayConfigData struct { // OlricConfigData holds parameters for olric.yaml rendering type OlricConfigData struct { - BindAddr string - HTTPPort int - MemberlistPort int + ServerBindAddr string // HTTP API bind address (127.0.0.1 for security) + HTTPPort int + MemberlistBindAddr string // Memberlist bind address (0.0.0.0 for clustering) + MemberlistPort int + MemberlistEnvironment string // "local", "lan", or "wan" } // SystemdIPFSData holds parameters for systemd IPFS service rendering diff --git a/pkg/environments/templates/render_test.go b/pkg/environments/templates/render_test.go index 54ee93e..3123f64 100644 --- a/pkg/environments/templates/render_test.go +++ b/pkg/environments/templates/render_test.go @@ -74,9 +74,11 @@ func TestRenderGatewayConfig(t *testing.T) { func TestRenderOlricConfig(t *testing.T) { data := OlricConfigData{ - BindAddr: "127.0.0.1", - HTTPPort: 3320, - MemberlistPort: 3322, + ServerBindAddr: "127.0.0.1", + HTTPPort: 3320, + MemberlistBindAddr: "0.0.0.0", + MemberlistPort: 3322, + MemberlistEnvironment: "lan", } result, err := RenderOlricConfig(data) @@ -90,6 +92,7 @@ func TestRenderOlricConfig(t *testing.T) { "bindPort: 3320", "memberlist", "bindPort: 3322", + "environment: lan", } for _, check := range checks { diff --git a/pkg/gateway/gateway.go b/pkg/gateway/gateway.go index dfb1c62..118e784 100644 --- a/pkg/gateway/gateway.go +++ b/pkg/gateway/gateway.go @@ -37,6 +37,7 @@ type Config struct { ListenAddr string ClientNamespace string BootstrapPeers []string + NodePeerID string // The node's actual peer ID from its identity file // Optional DSN for rqlite database/sql driver, e.g. "http://localhost:4001" // If empty, defaults to "http://localhost:4001". @@ -60,12 +61,13 @@ type Config struct { } type Gateway struct { - logger *logging.ColoredLogger - cfg *Config - client client.NetworkClient - startedAt time.Time - signingKey *rsa.PrivateKey - keyID string + logger *logging.ColoredLogger + cfg *Config + client client.NetworkClient + nodePeerID string // The node's actual peer ID from its identity file (overrides client's peer ID) + startedAt time.Time + signingKey *rsa.PrivateKey + keyID string // rqlite SQL connection and HTTP ORM gateway sqlDB *sql.DB @@ -123,6 +125,7 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) { logger: logger, cfg: cfg, client: c, + nodePeerID: cfg.NodePeerID, startedAt: time.Now(), localSubscribers: make(map[string][]*localSubscriber), } diff --git a/pkg/gateway/https.go b/pkg/gateway/https.go index 9e6c56a..38d63be 100644 --- a/pkg/gateway/https.go +++ b/pkg/gateway/https.go @@ -5,7 +5,6 @@ import ( "crypto/tls" "fmt" "net/http" - "os" "strings" "time" @@ -57,21 +56,18 @@ func NewHTTPSGateway(logger *logging.ColoredLogger, cfg *config.HTTPGatewayConfi ) // Don't set certManager - will use CertFile/KeyFile from config } else if cfg.HTTPS.AutoCert { - // Use Let's Encrypt (existing logic) + // Use Let's Encrypt STAGING (consistent with SNI gateway) cacheDir := cfg.HTTPS.CacheDir if cacheDir == "" { cacheDir = "/home/debros/.orama/tls-cache" } - // Check environment for staging mode - directoryURL := "https://acme-v02.api.letsencrypt.org/directory" // Production - if os.Getenv("DEBROS_ACME_STAGING") != "" { - directoryURL = "https://acme-staging-v02.api.letsencrypt.org/directory" - logger.ComponentWarn(logging.ComponentGeneral, - "Using Let's Encrypt STAGING - certificates will not be trusted by production clients", - zap.String("domain", cfg.HTTPS.Domain), - ) - } + // Use Let's Encrypt STAGING - provides higher rate limits for testing/development + directoryURL := "https://acme-staging-v02.api.letsencrypt.org/directory" + logger.ComponentWarn(logging.ComponentGeneral, + "Using Let's Encrypt STAGING - certificates will not be trusted by production clients", + zap.String("domain", cfg.HTTPS.Domain), + ) gateway.certManager = &autocert.Manager{ Prompt: autocert.AcceptTOS, @@ -86,7 +82,7 @@ func NewHTTPSGateway(logger *logging.ColoredLogger, cfg *config.HTTPGatewayConfi logger.ComponentInfo(logging.ComponentGeneral, "Let's Encrypt autocert configured", zap.String("domain", cfg.HTTPS.Domain), zap.String("cache_dir", cacheDir), - zap.String("acme_environment", map[bool]string{true: "staging", false: "production"}[directoryURL == "https://acme-staging-v02.api.letsencrypt.org/directory"]), + zap.String("acme_environment", "staging"), ) } diff --git a/pkg/gateway/middleware.go b/pkg/gateway/middleware.go index 198a831..6d74564 100644 --- a/pkg/gateway/middleware.go +++ b/pkg/gateway/middleware.go @@ -178,8 +178,13 @@ func extractAPIKey(r *http.Request) string { // isPublicPath returns true for routes that should be accessible without API key auth func isPublicPath(p string) bool { + // Allow ACME challenges for Let's Encrypt certificate provisioning + if strings.HasPrefix(p, "/.well-known/acme-challenge/") { + return true + } + switch p { - case "/health", "/v1/health", "/status", "/v1/status", "/v1/auth/jwks", "/.well-known/jwks.json", "/v1/version", "/v1/auth/login", "/v1/auth/challenge", "/v1/auth/verify", "/v1/auth/register", "/v1/auth/refresh", "/v1/auth/logout", "/v1/auth/api-key", "/v1/auth/simple-key": + case "/health", "/v1/health", "/status", "/v1/status", "/v1/auth/jwks", "/.well-known/jwks.json", "/v1/version", "/v1/auth/login", "/v1/auth/challenge", "/v1/auth/verify", "/v1/auth/register", "/v1/auth/refresh", "/v1/auth/logout", "/v1/auth/api-key", "/v1/auth/simple-key", "/v1/network/status", "/v1/network/peers": return true default: return false diff --git a/pkg/gateway/storage_handlers.go b/pkg/gateway/storage_handlers.go index 3bb74d6..925eb29 100644 --- a/pkg/gateway/storage_handlers.go +++ b/pkg/gateway/storage_handlers.go @@ -386,6 +386,11 @@ func (g *Gateway) networkStatusHandler(w http.ResponseWriter, r *http.Request) { writeError(w, http.StatusInternalServerError, err.Error()) return } + // Override with the node's actual peer ID if available + // (the client's embedded host has a different temporary peer ID) + if g.nodePeerID != "" { + status.PeerID = g.nodePeerID + } writeJSON(w, http.StatusOK, status) } diff --git a/pkg/installer/installer.go b/pkg/installer/installer.go index a60ade5..50d76da 100644 --- a/pkg/installer/installer.go +++ b/pkg/installer/installer.go @@ -22,15 +22,18 @@ import ( // InstallerConfig holds the configuration gathered from the TUI type InstallerConfig struct { - VpsIP string - Domain string - PeerDomain string // Domain of existing node to join - JoinAddress string // Auto-populated: raft.{PeerDomain}:7001 - Peers []string // Auto-populated: /dns4/{PeerDomain}/tcp/4001/p2p/{PeerID} - ClusterSecret string - Branch string - IsFirstNode bool - NoPull bool + VpsIP string + Domain string + PeerDomain string // Domain of existing node to join + JoinAddress string // Auto-populated: raft.{PeerDomain}:7001 + Peers []string // Auto-populated: /dns4/{PeerDomain}/tcp/4001/p2p/{PeerID} + ClusterSecret string + SwarmKeyHex string // 64-hex IPFS swarm key (for joining private network) + IPFSPeerID string // IPFS peer ID (auto-discovered from peer domain) + IPFSSwarmAddrs []string // IPFS swarm addresses (auto-discovered from peer domain) + Branch string + IsFirstNode bool + NoPull bool } // Step represents a step in the installation wizard @@ -43,6 +46,7 @@ const ( StepDomain StepPeerDomain // Domain of existing node to join (replaces StepJoinAddress) StepClusterSecret + StepSwarmKey // 64-hex swarm key for IPFS private network StepBranch StepNoPull StepConfirm @@ -171,7 +175,7 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { } // Update text input for input steps - if m.step == StepVpsIP || m.step == StepDomain || m.step == StepPeerDomain || m.step == StepClusterSecret { + if m.step == StepVpsIP || m.step == StepDomain || m.step == StepPeerDomain || m.step == StepClusterSecret || m.step == StepSwarmKey { var cmd tea.Cmd m.textInput, cmd = m.textInput.Update(msg) return m, cmd @@ -208,6 +212,18 @@ func (m *Model) handleEnter() (tea.Model, tea.Cmd) { m.err = err return m, nil } + + // Check SNI DNS records for this domain + m.discovering = true + m.discoveryInfo = "Validating SNI DNS records for " + domain + "..." + + if err := validateSNIDNSRecords(domain); err != nil { + m.discovering = false + m.err = fmt.Errorf("SNI DNS validation failed: %w", err) + return m, nil + } + + m.discovering = false m.config.Domain = domain m.err = nil @@ -238,11 +254,21 @@ func (m *Model) handleEnter() (tea.Model, tea.Cmd) { return m, nil } + // Validate SNI DNS records for peer domain + m.discovering = true + m.discoveryInfo = "Validating SNI DNS records for " + peerDomain + "..." + + if err := validateSNIDNSRecords(peerDomain); err != nil { + m.discovering = false + m.err = fmt.Errorf("SNI DNS validation failed: %w", err) + return m, nil + } + // Discover peer info from domain (try HTTPS first, then HTTP) m.discovering = true m.discoveryInfo = "Discovering peer from " + peerDomain + "..." - peerID, err := discoverPeerFromDomain(peerDomain) + discovery, err := discoverPeerFromDomain(peerDomain) m.discovering = false if err != nil { @@ -252,12 +278,18 @@ func (m *Model) handleEnter() (tea.Model, tea.Cmd) { // Store discovered info m.config.PeerDomain = peerDomain - m.discoveredPeer = peerID + m.discoveredPeer = discovery.PeerID // Auto-populate join address and bootstrap peers m.config.JoinAddress = fmt.Sprintf("raft.%s:7001", peerDomain) m.config.Peers = []string{ - fmt.Sprintf("/dns4/%s/tcp/4001/p2p/%s", peerDomain, peerID), + fmt.Sprintf("/dns4/%s/tcp/4001/p2p/%s", peerDomain, discovery.PeerID), + } + + // Store IPFS peer info for Peering.Peers configuration + if discovery.IPFSPeerID != "" { + m.config.IPFSPeerID = discovery.IPFSPeerID + m.config.IPFSSwarmAddrs = discovery.IPFSSwarmAddrs } m.err = nil @@ -272,6 +304,17 @@ func (m *Model) handleEnter() (tea.Model, tea.Cmd) { } m.config.ClusterSecret = secret m.err = nil + m.step = StepSwarmKey + m.setupStepInput() + + case StepSwarmKey: + swarmKey := strings.TrimSpace(m.textInput.Value()) + if err := validateSwarmKey(swarmKey); err != nil { + m.err = err + return m, nil + } + m.config.SwarmKeyHex = swarmKey + m.err = nil m.step = StepBranch m.cursor = 0 @@ -322,6 +365,9 @@ func (m *Model) setupStepInput() { case StepClusterSecret: m.textInput.Placeholder = "64 hex characters" m.textInput.EchoMode = textinput.EchoPassword + case StepSwarmKey: + m.textInput.Placeholder = "64 hex characters" + m.textInput.EchoMode = textinput.EchoPassword } } @@ -358,6 +404,8 @@ func (m Model) View() string { s.WriteString(m.viewPeerDomain()) case StepClusterSecret: s.WriteString(m.viewClusterSecret()) + case StepSwarmKey: + s.WriteString(m.viewSwarmKey()) case StepBranch: s.WriteString(m.viewBranch()) case StepNoPull: @@ -488,6 +536,22 @@ func (m Model) viewClusterSecret() string { return s.String() } +func (m Model) viewSwarmKey() string { + var s strings.Builder + s.WriteString(titleStyle.Render("IPFS Swarm Key") + "\n\n") + s.WriteString("Enter the swarm key from an existing node:\n") + s.WriteString(subtitleStyle.Render("Get it with: cat ~/.orama/secrets/swarm.key | tail -1") + "\n\n") + s.WriteString(m.textInput.View()) + + if m.err != nil { + s.WriteString("\n\n" + errorStyle.Render("✗ " + m.err.Error())) + } + + s.WriteString("\n\n") + s.WriteString(helpStyle.Render("Enter to confirm • Esc to go back")) + return s.String() +} + func (m Model) viewBranch() string { var s strings.Builder s.WriteString(titleStyle.Render("Release Channel") + "\n\n") @@ -557,6 +621,12 @@ func (m Model) viewConfirm() string { if len(m.config.ClusterSecret) >= 8 { config += fmt.Sprintf(" Secret: %s...\n", m.config.ClusterSecret[:8]) } + if len(m.config.SwarmKeyHex) >= 8 { + config += fmt.Sprintf(" Swarm Key: %s...\n", m.config.SwarmKeyHex[:8]) + } + if m.config.IPFSPeerID != "" { + config += fmt.Sprintf(" IPFS Peer: %s...\n", m.config.IPFSPeerID[:16]) + } } s.WriteString(boxStyle.Render(config)) @@ -617,10 +687,17 @@ func validateDomain(domain string) error { return nil } -// discoverPeerFromDomain queries an existing node to get its peer ID +// DiscoveryResult contains all information discovered from a peer node +type DiscoveryResult struct { + PeerID string // LibP2P peer ID + IPFSPeerID string // IPFS peer ID + IPFSSwarmAddrs []string // IPFS swarm addresses +} + +// discoverPeerFromDomain queries an existing node to get its peer ID and IPFS info // Tries HTTPS first, then falls back to HTTP // Respects DEBROS_TRUSTED_TLS_DOMAINS and DEBROS_CA_CERT_PATH environment variables for certificate verification -func discoverPeerFromDomain(domain string) (string, error) { +func discoverPeerFromDomain(domain string) (*DiscoveryResult, error) { // Use centralized TLS configuration that respects CA certificates and trusted domains client := tlsutil.NewHTTPClientForDomain(10*time.Second, domain) @@ -634,28 +711,49 @@ func discoverPeerFromDomain(domain string) (string, error) { url = fmt.Sprintf("http://%s/v1/network/status", domain) resp, err = client.Get(url) if err != nil { - return "", fmt.Errorf("could not connect to %s (tried HTTPS and HTTP): %w", domain, err) + return nil, fmt.Errorf("could not connect to %s (tried HTTPS and HTTP): %w", domain, err) } } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("unexpected status from %s: %s", domain, resp.Status) + return nil, fmt.Errorf("unexpected status from %s: %s", domain, resp.Status) } - // Parse response + // Parse response including IPFS info var status struct { - NodeID string `json:"node_id"` + PeerID string `json:"peer_id"` + NodeID string `json:"node_id"` // fallback for backward compatibility + IPFS *struct { + PeerID string `json:"peer_id"` + SwarmAddresses []string `json:"swarm_addresses"` + } `json:"ipfs,omitempty"` } if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { - return "", fmt.Errorf("failed to parse response from %s: %w", domain, err) + return nil, fmt.Errorf("failed to parse response from %s: %w", domain, err) } - if status.NodeID == "" { - return "", fmt.Errorf("no node_id in response from %s", domain) + // Use peer_id if available, otherwise fall back to node_id for backward compatibility + peerID := status.PeerID + if peerID == "" { + peerID = status.NodeID } - return status.NodeID, nil + if peerID == "" { + return nil, fmt.Errorf("no peer_id or node_id in response from %s", domain) + } + + result := &DiscoveryResult{ + PeerID: peerID, + } + + // Include IPFS info if available + if status.IPFS != nil { + result.IPFSPeerID = status.IPFS.PeerID + result.IPFSSwarmAddrs = status.IPFS.SwarmAddresses + } + + return result, nil } func validateClusterSecret(secret string) error { @@ -669,6 +767,17 @@ func validateClusterSecret(secret string) error { return nil } +func validateSwarmKey(key string) error { + if len(key) != 64 { + return fmt.Errorf("swarm key must be 64 hex characters") + } + keyRegex := regexp.MustCompile(`^[a-fA-F0-9]{64}$`) + if !keyRegex.MatchString(key) { + return fmt.Errorf("swarm key must be valid hexadecimal") + } + return nil +} + // ensureCertificatesForDomain generates self-signed certificates for the domain func ensureCertificatesForDomain(domain string) error { // Get home directory @@ -726,6 +835,53 @@ func detectPublicIP() string { return "" } +// validateSNIDNSRecords checks if the required SNI DNS records exist +// It tries to resolve the key SNI hostnames for RQLite, IPFS, IPFS Cluster, and Olric +// All should resolve to the same IP (the node's public IP or domain) +func validateSNIDNSRecords(domain string) error { + // List of SNI services that need DNS records + sniServices := []string{ + fmt.Sprintf("raft.%s", domain), + fmt.Sprintf("ipfs.%s", domain), + fmt.Sprintf("ipfs-cluster.%s", domain), + fmt.Sprintf("olric.%s", domain), + } + + // Try to resolve the main domain first to get baseline + mainIPs, err := net.LookupHost(domain) + if err != nil { + return fmt.Errorf("could not resolve main domain %s: %w", domain, err) + } + + if len(mainIPs) == 0 { + return fmt.Errorf("main domain %s resolved to no IP addresses", domain) + } + + // Check each SNI service + var unresolvedServices []string + for _, service := range sniServices { + ips, err := net.LookupHost(service) + if err != nil || len(ips) == 0 { + unresolvedServices = append(unresolvedServices, service) + } + } + + if len(unresolvedServices) > 0 { + serviceList := strings.Join(unresolvedServices, ", ") + return fmt.Errorf( + "SNI DNS records not found for: %s\n\n"+ + "You need to add DNS records (A records or wildcard CNAME) for these services:\n"+ + " - They should all resolve to the same IP as %s\n"+ + " - Option 1: Add individual A records pointing to %s\n"+ + " - Option 2: Add wildcard CNAME: *.%s -> %s\n\n"+ + "Without these records, multi-node clustering will fail.", + serviceList, domain, domain, domain, domain, + ) + } + + return nil +} + // Run starts the TUI installer and returns the configuration func Run() (*InstallerConfig, error) { // Check if running as root diff --git a/pkg/ipfs/cluster.go b/pkg/ipfs/cluster.go index 662e6d2..ebae2f7 100644 --- a/pkg/ipfs/cluster.go +++ b/pkg/ipfs/cluster.go @@ -103,12 +103,17 @@ func NewClusterConfigManager(cfg *config.Config, logger *zap.Logger) (*ClusterCo } // Load or generate cluster secret + // Always use ~/.orama/secrets/cluster-secret (new standard location) secretPath := filepath.Join(dataDir, "..", "cluster-secret") if strings.Contains(dataDir, ".orama") { - // Try to find cluster-secret in ~/.orama + // Use the secrets directory for proper file organization home, err := os.UserHomeDir() if err == nil { - secretPath = filepath.Join(home, ".orama", "cluster-secret") + secretsDir := filepath.Join(home, ".orama", "secrets") + // Ensure secrets directory exists + if err := os.MkdirAll(secretsDir, 0700); err == nil { + secretPath = filepath.Join(secretsDir, "cluster-secret") + } } } diff --git a/pkg/node/node.go b/pkg/node/node.go index 5e3f5e1..b75aca1 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -694,6 +694,28 @@ func (n *Node) Stop() error { return nil } +// loadNodePeerIDFromIdentity safely loads the node's peer ID from its identity file +// This is needed before the host is initialized, so we read directly from the file +func loadNodePeerIDFromIdentity(dataDir string) string { + identityFile := filepath.Join(os.ExpandEnv(dataDir), "identity.key") + + // Expand ~ in path + if strings.HasPrefix(identityFile, "~") { + home, err := os.UserHomeDir() + if err != nil { + return "" + } + identityFile = filepath.Join(home, identityFile[1:]) + } + + // Load identity from file + if info, err := encryption.LoadIdentity(identityFile); err == nil { + return info.PeerID.String() + } + + return "" // Return empty string if can't load (gateway will work without it) +} + // startHTTPGateway initializes and starts the full API gateway with auth, pubsub, and API endpoints func (n *Node) startHTTPGateway(ctx context.Context) error { if !n.config.HTTPGateway.Enabled { @@ -721,6 +743,7 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { ListenAddr: n.config.HTTPGateway.ListenAddr, ClientNamespace: n.config.HTTPGateway.ClientNamespace, BootstrapPeers: n.config.Discovery.BootstrapPeers, + NodePeerID: loadNodePeerIDFromIdentity(n.config.Node.DataDir), // Load the node's actual peer ID from its identity file RQLiteDSN: n.config.HTTPGateway.RQLiteDSN, OlricServers: n.config.HTTPGateway.OlricServers, OlricTimeout: n.config.HTTPGateway.OlricTimeout, @@ -750,6 +773,18 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { tlsCacheDir = "/home/debros/.orama/tls-cache" } + // Ensure TLS cache directory exists and is writable + if err := os.MkdirAll(tlsCacheDir, 0700); err != nil { + n.logger.ComponentWarn(logging.ComponentNode, "Failed to create TLS cache directory", + zap.String("dir", tlsCacheDir), + zap.Error(err), + ) + } else { + n.logger.ComponentInfo(logging.ComponentNode, "TLS cache directory ready", + zap.String("dir", tlsCacheDir), + ) + } + // Create TLS configuration with Let's Encrypt autocert // Using STAGING environment to avoid rate limits during development/testing // TODO: Switch to production when ready (remove Client field) @@ -799,16 +834,25 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { } // Create HTTP listener first to ensure port 80 is bound before signaling ready + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Binding HTTP listener for ACME challenges", + zap.Int("port", httpPort), + ) httpListener, err := net.Listen("tcp", fmt.Sprintf(":%d", httpPort)) if err != nil { gatewayLogger.ComponentError(logging.ComponentGateway, "failed to bind HTTP listener for ACME", zap.Error(err)) close(httpReady) // Signal even on failure so SNI goroutine doesn't hang return } - gatewayLogger.ComponentInfo(logging.ComponentGateway, "HTTP server ready for ACME challenges", zap.Int("port", httpPort)) + gatewayLogger.ComponentInfo(logging.ComponentGateway, "HTTP server ready for ACME challenges", + zap.Int("port", httpPort), + zap.String("tls_cache_dir", tlsCacheDir), + ) // Start HTTP server in background for ACME challenges go func() { + gatewayLogger.ComponentInfo(logging.ComponentGateway, "HTTP server serving ACME challenges", + zap.String("addr", httpServer.Addr), + ) if err := httpServer.Serve(httpListener); err != nil && err != http.ErrServerClosed { gatewayLogger.ComponentError(logging.ComponentGateway, "HTTP server error", zap.Error(err)) } @@ -829,10 +873,23 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { ServerName: gwCfg.DomainName, } + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Initiating certificate request to Let's Encrypt", + zap.String("domain", gwCfg.DomainName), + zap.String("acme_environment", "staging"), + ) + // Try to get certificate with timeout certProvisionChan := make(chan error, 1) go func() { + gatewayLogger.ComponentInfo(logging.ComponentGateway, "GetCertificate goroutine started") _, err := certManager.GetCertificate(certReq) + if err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "GetCertificate returned error", + zap.Error(err), + ) + } else { + gatewayLogger.ComponentInfo(logging.ComponentGateway, "GetCertificate succeeded") + } certProvisionChan <- err }() @@ -840,14 +897,26 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { select { case err := <-certProvisionChan: certErr = err + if certErr != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "Certificate provisioning failed", + zap.String("domain", gwCfg.DomainName), + zap.Error(certErr), + ) + } case <-certCtx.Done(): certErr = fmt.Errorf("certificate provisioning timeout (Let's Encrypt may be rate-limited or unreachable)") + gatewayLogger.ComponentError(logging.ComponentGateway, "Certificate provisioning timeout", + zap.String("domain", gwCfg.DomainName), + zap.Duration("timeout", 30*time.Second), + zap.Error(certErr), + ) } if certErr != nil { gatewayLogger.ComponentError(logging.ComponentGateway, "Failed to provision TLS certificate - HTTPS disabled", zap.String("domain", gwCfg.DomainName), zap.Error(certErr), + zap.String("http_server_status", "running on port 80 for HTTP fallback"), ) // Signal ready for SNI goroutine (even though we're failing) close(httpReady) diff --git a/pkg/rqlite/rqlite.go b/pkg/rqlite/rqlite.go index 647cf10..6e8fda1 100644 --- a/pkg/rqlite/rqlite.go +++ b/pkg/rqlite/rqlite.go @@ -241,6 +241,27 @@ func (r *RQLiteManager) launchProcess(ctx context.Context, rqliteDataDir string) "-raft-addr", fmt.Sprintf("0.0.0.0:%d", r.config.RQLiteRaftPort), } + // Add node-to-node TLS encryption if configured + // This enables TLS for Raft inter-node communication, required for SNI gateway routing + // See: https://rqlite.io/docs/guides/security/#encrypting-node-to-node-communication + if r.config.NodeCert != "" && r.config.NodeKey != "" { + r.logger.Info("Enabling node-to-node TLS encryption", + zap.String("node_cert", r.config.NodeCert), + zap.String("node_key", r.config.NodeKey), + zap.String("node_ca_cert", r.config.NodeCACert), + zap.Bool("node_no_verify", r.config.NodeNoVerify)) + + args = append(args, "-node-cert", r.config.NodeCert) + args = append(args, "-node-key", r.config.NodeKey) + + if r.config.NodeCACert != "" { + args = append(args, "-node-ca-cert", r.config.NodeCACert) + } + if r.config.NodeNoVerify { + args = append(args, "-node-no-verify") + } + } + // All nodes follow the same join logic - either join specified address or start as single-node cluster if r.config.RQLiteJoinAddress != "" { r.logger.Info("Joining RQLite cluster", zap.String("join_address", r.config.RQLiteJoinAddress)) @@ -265,7 +286,8 @@ func (r *RQLiteManager) launchProcess(ctx context.Context, rqliteDataDir string) // Always add the join parameter in host:port form - let rqlited handle the rest // Add retry parameters to handle slow cluster startup (e.g., during recovery) - args = append(args, "-join", joinArg, "-join-attempts", "30", "-join-interval", "10s") + // Include -join-as with the raft advertise address so the leader knows which node this is + args = append(args, "-join", joinArg, "-join-as", r.discoverConfig.RaftAdvAddress, "-join-attempts", "30", "-join-interval", "10s") } else { r.logger.Info("No join address specified - starting as single-node cluster") // When no join address is provided, rqlited will start as a single-node cluster diff --git a/pkg/tlsutil/client.go b/pkg/tlsutil/client.go index f8702ef..735ce8e 100644 --- a/pkg/tlsutil/client.go +++ b/pkg/tlsutil/client.go @@ -18,8 +18,17 @@ var ( initialized bool ) +// Default trusted domains - always trust debros.network for staging/development +var defaultTrustedDomains = []string{ + "*.debros.network", +} + // init loads trusted domains and CA certificate from environment and files func init() { + // Start with default trusted domains + trustedDomains = append(trustedDomains, defaultTrustedDomains...) + + // Add any additional domains from environment domains := os.Getenv("DEBROS_TRUSTED_TLS_DOMAINS") if domains != "" { for _, d := range strings.Split(domains, ",") {