From 5c7767b7c80ac0afa81ff706f50caba185f53e7a Mon Sep 17 00:00:00 2001 From: anonpenguin23 Date: Thu, 27 Nov 2025 16:48:02 +0200 Subject: [PATCH] feat: enhance HTTPS support and certificate management - Added a new CertificateManager for managing self-signed certificates, ensuring secure communication within the network. - Updated the configuration to support self-signed certificates and Let's Encrypt integration for HTTPS. - Enhanced the installer to generate and manage certificates automatically, improving the setup experience. - Introduced a centralized TLS configuration for HTTP clients, ensuring consistent security practices across the application. - Updated documentation to reflect new port requirements and HTTPS setup instructions. --- CHANGELOG.md | 22 ++ Makefile | 2 +- README.md | 20 +- pkg/certutil/cert_manager.go | 257 +++++++++++++++ pkg/cli/prod_commands.go | 291 ++++++++++++++--- pkg/cli/prod_commands_test.go | 163 +++++++-- pkg/config/config.go | 19 +- pkg/config/validate.go | 4 +- pkg/config/validate_test.go | 2 +- pkg/environments/development/checks_test.go | 3 +- pkg/environments/development/health.go | 4 +- pkg/environments/development/runner.go | 4 +- pkg/environments/production/config.go | 71 ++-- pkg/environments/production/installers.go | 236 ++++++++----- pkg/environments/production/orchestrator.go | 46 ++- pkg/environments/production/provisioner.go | 15 + pkg/environments/production/services.go | 70 +++- pkg/environments/templates/node.yaml | 26 +- pkg/environments/templates/render.go | 31 +- pkg/gateway/https.go | 29 +- pkg/installer/installer.go | 266 ++++++++++++--- pkg/ipfs/cluster.go | 19 +- pkg/logging/logger.go | 3 + pkg/node/node.go | 345 +++++++++++++++++++- pkg/rqlite/rqlite.go | 10 +- pkg/tlsutil/client.go | 113 +++++++ test.sh | 4 + 27 files changed, 1748 insertions(+), 327 deletions(-) create mode 100644 pkg/certutil/cert_manager.go create mode 100644 pkg/tlsutil/client.go create mode 100755 test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fbd02e..364ab11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,28 @@ The format is based on [Keep a Changelog][keepachangelog] and adheres to [Semant ### Deprecated ### Fixed +## [0.71.0] - 2025-11-27 + +### Added +- Added `certutil` package for managing self-signed CA and node certificates. +- Added support for SNI-based TCP routing for internal services (RQLite Raft, IPFS, Olric) when HTTPS is enabled. +- Added `--dry-run`, `--no-pull`, and DNS validation checks to the production installer. +- Added `tlsutil` package to centralize TLS configuration and support trusted self-signed certificates for internal communication. + +### Changed +- Refactored production installer to use a unified node architecture, removing the separate `debros-gateway` service and embedding the gateway within `debros-node`. +- Improved service health checks in the CLI with exponential backoff retries for better reliability during startup and upgrades. +- Updated RQLite to listen on an internal port (7002) when SNI is enabled, allowing the SNI gateway to handle external port 7001. +- Enhanced systemd service files with stricter security settings (e.g., `ProtectHome=read-only`, `ProtectSystem=strict`). +- Updated IPFS configuration to bind Swarm to all interfaces (0.0.0.0) for external connectivity. + +### Deprecated + +### Removed + +### Fixed +- Fixed an issue where the `anyone-client` installation could fail due to missing NPM cache directories by ensuring proper initialization and ownership. + ## [0.70.0] - 2025-11-26 ### Added diff --git a/Makefile b/Makefile index 94334e1..1cb885e 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ test-e2e: .PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill -VERSION := 0.70.0 +VERSION := 0.71.0 COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' diff --git a/README.md b/README.md index cccd6b5..48d4d59 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,25 @@ make build - Ubuntu 22.04+ or Debian 12+ - `amd64` or `arm64` architecture - 4GB RAM, 50GB SSD, 2 CPU cores -- Ports 80, 443 (HTTPS), 7001 (TCP/SNI gateway) + +### Required Ports + +**External (must be open in firewall):** + +- **80** - HTTP (ACME/Let's Encrypt certificate challenges) +- **443** - HTTPS (Main gateway API endpoint) +- **4101** - IPFS Swarm (peer connections) +- **7001** - RQLite Raft (cluster consensus) + +**Internal (bound to localhost, no firewall needed):** + +- 4501 - IPFS API +- 5001 - RQLite HTTP API +- 6001 - Unified Gateway +- 8080 - IPFS Gateway +- 9050 - Anyone Client SOCKS5 proxy +- 9094 - IPFS Cluster API +- 3320/3322 - Olric Cache ### Installation diff --git a/pkg/certutil/cert_manager.go b/pkg/certutil/cert_manager.go new file mode 100644 index 0000000..db484e5 --- /dev/null +++ b/pkg/certutil/cert_manager.go @@ -0,0 +1,257 @@ +// Package certutil provides utilities for managing self-signed certificates +package certutil + +import ( + "crypto/rand" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "fmt" + "math/big" + "net" + "os" + "path/filepath" + "time" +) + +// CertificateManager manages self-signed certificates for the network +type CertificateManager struct { + baseDir string +} + +// NewCertificateManager creates a new certificate manager +func NewCertificateManager(baseDir string) *CertificateManager { + return &CertificateManager{ + baseDir: baseDir, + } +} + +// EnsureCACertificate creates or loads the CA certificate +func (cm *CertificateManager) EnsureCACertificate() ([]byte, []byte, error) { + caCertPath := filepath.Join(cm.baseDir, "ca.crt") + caKeyPath := filepath.Join(cm.baseDir, "ca.key") + + // Check if CA already exists + if _, err := os.Stat(caCertPath); err == nil { + certPEM, err := os.ReadFile(caCertPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to read CA certificate: %w", err) + } + keyPEM, err := os.ReadFile(caKeyPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to read CA key: %w", err) + } + return certPEM, keyPEM, nil + } + + // Create new CA certificate + certPEM, keyPEM, err := cm.generateCACertificate() + if err != nil { + return nil, nil, err + } + + // Ensure directory exists + if err := os.MkdirAll(cm.baseDir, 0700); err != nil { + return nil, nil, fmt.Errorf("failed to create cert directory: %w", err) + } + + // Write to files + if err := os.WriteFile(caCertPath, certPEM, 0644); err != nil { + return nil, nil, fmt.Errorf("failed to write CA certificate: %w", err) + } + if err := os.WriteFile(caKeyPath, keyPEM, 0600); err != nil { + return nil, nil, fmt.Errorf("failed to write CA key: %w", err) + } + + return certPEM, keyPEM, nil +} + +// EnsureNodeCertificate creates or loads a node certificate signed by the CA +func (cm *CertificateManager) EnsureNodeCertificate(hostname string, caCertPEM, caKeyPEM []byte) ([]byte, []byte, error) { + certPath := filepath.Join(cm.baseDir, fmt.Sprintf("%s.crt", hostname)) + keyPath := filepath.Join(cm.baseDir, fmt.Sprintf("%s.key", hostname)) + + // Check if certificate already exists + if _, err := os.Stat(certPath); err == nil { + certData, err := os.ReadFile(certPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to read certificate: %w", err) + } + keyData, err := os.ReadFile(keyPath) + if err != nil { + return nil, nil, fmt.Errorf("failed to read key: %w", err) + } + return certData, keyData, nil + } + + // Create new certificate + certPEM, keyPEM, err := cm.generateNodeCertificate(hostname, caCertPEM, caKeyPEM) + if err != nil { + return nil, nil, err + } + + // Write to files + if err := os.WriteFile(certPath, certPEM, 0644); err != nil { + return nil, nil, fmt.Errorf("failed to write certificate: %w", err) + } + if err := os.WriteFile(keyPath, keyPEM, 0600); err != nil { + return nil, nil, fmt.Errorf("failed to write key: %w", err) + } + + return certPEM, keyPEM, nil +} + +// generateCACertificate generates a self-signed CA certificate +func (cm *CertificateManager) generateCACertificate() ([]byte, []byte, error) { + // Generate private key + privateKey, err := rsa.GenerateKey(rand.Reader, 4096) + if err != nil { + return nil, nil, fmt.Errorf("failed to generate private key: %w", err) + } + + // Create certificate template + template := x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{ + CommonName: "DeBros Network Root CA", + Organization: []string{"DeBros"}, + }, + NotBefore: time.Now(), + NotAfter: time.Now().AddDate(10, 0, 0), // 10 year validity + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, + ExtKeyUsage: []x509.ExtKeyUsage{}, + BasicConstraintsValid: true, + IsCA: true, + } + + // Self-sign the certificate + certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &privateKey.PublicKey, privateKey) + if err != nil { + return nil, nil, fmt.Errorf("failed to create certificate: %w", err) + } + + // Encode certificate to PEM + certPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: certDER, + }) + + // Encode private key to PEM + keyDER, err := x509.MarshalPKCS8PrivateKey(privateKey) + if err != nil { + return nil, nil, fmt.Errorf("failed to marshal private key: %w", err) + } + + keyPEM := pem.EncodeToMemory(&pem.Block{ + Type: "PRIVATE KEY", + Bytes: keyDER, + }) + + return certPEM, keyPEM, nil +} + +// generateNodeCertificate generates a certificate signed by the CA +func (cm *CertificateManager) generateNodeCertificate(hostname string, caCertPEM, caKeyPEM []byte) ([]byte, []byte, error) { + // Parse CA certificate and key + caCert, caKey, err := cm.parseCACertificate(caCertPEM, caKeyPEM) + if err != nil { + return nil, nil, err + } + + // Generate node private key + privateKey, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + return nil, nil, fmt.Errorf("failed to generate private key: %w", err) + } + + // Create certificate template + template := x509.Certificate{ + SerialNumber: big.NewInt(time.Now().UnixNano()), + Subject: pkix.Name{ + CommonName: hostname, + }, + NotBefore: time.Now(), + NotAfter: time.Now().AddDate(5, 0, 0), // 5 year validity + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + DNSNames: []string{hostname}, + } + + // Add wildcard support if hostname contains *.debros.network + if hostname == "*.debros.network" { + template.DNSNames = []string{"*.debros.network", "debros.network"} + } else if hostname == "debros.network" { + template.DNSNames = []string{"*.debros.network", "debros.network"} + } + + // Try to parse as IP address for IP-based certificates + if ip := net.ParseIP(hostname); ip != nil { + template.IPAddresses = []net.IP{ip} + template.DNSNames = nil + } + + // Sign certificate with CA + certDER, err := x509.CreateCertificate(rand.Reader, &template, caCert, &privateKey.PublicKey, caKey) + if err != nil { + return nil, nil, fmt.Errorf("failed to create certificate: %w", err) + } + + // Encode certificate to PEM + certPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: certDER, + }) + + // Encode private key to PEM + keyDER, err := x509.MarshalPKCS8PrivateKey(privateKey) + if err != nil { + return nil, nil, fmt.Errorf("failed to marshal private key: %w", err) + } + + keyPEM := pem.EncodeToMemory(&pem.Block{ + Type: "PRIVATE KEY", + Bytes: keyDER, + }) + + return certPEM, keyPEM, nil +} + +// parseCACertificate parses CA certificate and key from PEM +func (cm *CertificateManager) parseCACertificate(caCertPEM, caKeyPEM []byte) (*x509.Certificate, *rsa.PrivateKey, error) { + // Parse CA certificate + certBlock, _ := pem.Decode(caCertPEM) + if certBlock == nil { + return nil, nil, fmt.Errorf("failed to parse CA certificate PEM") + } + + caCert, err := x509.ParseCertificate(certBlock.Bytes) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse CA certificate: %w", err) + } + + // Parse CA private key + keyBlock, _ := pem.Decode(caKeyPEM) + if keyBlock == nil { + return nil, nil, fmt.Errorf("failed to parse CA key PEM") + } + + caKey, err := x509.ParsePKCS8PrivateKey(keyBlock.Bytes) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse CA key: %w", err) + } + + rsaKey, ok := caKey.(*rsa.PrivateKey) + if !ok { + return nil, nil, fmt.Errorf("CA key is not RSA") + } + + return caCert, rsaKey, nil +} + +// LoadTLSCertificate loads a TLS certificate from PEM files +func LoadTLSCertificate(certPEM, keyPEM []byte) (tls.Certificate, error) { + return tls.X509KeyPair(certPEM, keyPEM) +} + diff --git a/pkg/cli/prod_commands.go b/pkg/cli/prod_commands.go index 5e7551e..cf9315d 100644 --- a/pkg/cli/prod_commands.go +++ b/pkg/cli/prod_commands.go @@ -14,8 +14,10 @@ import ( "syscall" "time" + "github.com/DeBrosOfficial/network/pkg/config" "github.com/DeBrosOfficial/network/pkg/environments/production" "github.com/DeBrosOfficial/network/pkg/installer" + "github.com/DeBrosOfficial/network/pkg/tlsutil" "github.com/multiformats/go-multiaddr" ) @@ -33,6 +35,10 @@ func runInteractiveInstaller() { args = append(args, "--domain", config.Domain) args = append(args, "--branch", config.Branch) + if config.NoPull { + args = append(args, "--no-pull") + } + if !config.IsFirstNode { if config.JoinAddress != "" { args = append(args, "--join", config.JoinAddress) @@ -49,6 +55,153 @@ func runInteractiveInstaller() { handleProdInstall(args) } +// showDryRunSummary displays what would be done during installation without making changes +func showDryRunSummary(vpsIP, domain, branch string, peers []string, joinAddress string, isFirstNode bool, oramaDir string) { + fmt.Printf("\n" + strings.Repeat("=", 70) + "\n") + fmt.Printf("DRY RUN - No changes will be made\n") + fmt.Printf(strings.Repeat("=", 70) + "\n\n") + + fmt.Printf("šŸ“‹ Installation Summary:\n") + fmt.Printf(" VPS IP: %s\n", vpsIP) + fmt.Printf(" Domain: %s\n", domain) + fmt.Printf(" Branch: %s\n", branch) + if isFirstNode { + fmt.Printf(" Node Type: First node (creates new cluster)\n") + } else { + fmt.Printf(" Node Type: Joining existing cluster\n") + if joinAddress != "" { + fmt.Printf(" Join Address: %s\n", joinAddress) + } + if len(peers) > 0 { + fmt.Printf(" Peers: %d peer(s)\n", len(peers)) + for _, peer := range peers { + fmt.Printf(" - %s\n", peer) + } + } + } + + fmt.Printf("\nšŸ“ Directories that would be created:\n") + fmt.Printf(" %s/configs/\n", oramaDir) + fmt.Printf(" %s/secrets/\n", oramaDir) + fmt.Printf(" %s/data/ipfs/repo/\n", oramaDir) + fmt.Printf(" %s/data/ipfs-cluster/\n", oramaDir) + fmt.Printf(" %s/data/rqlite/\n", oramaDir) + fmt.Printf(" %s/logs/\n", oramaDir) + fmt.Printf(" %s/tls-cache/\n", oramaDir) + + fmt.Printf("\nšŸ”§ Binaries that would be installed:\n") + fmt.Printf(" - Go (if not present)\n") + fmt.Printf(" - RQLite 8.43.0\n") + fmt.Printf(" - IPFS/Kubo 0.38.2\n") + fmt.Printf(" - IPFS Cluster (latest)\n") + fmt.Printf(" - Olric 0.7.0\n") + fmt.Printf(" - anyone-client (npm)\n") + fmt.Printf(" - DeBros binaries (built from %s branch)\n", branch) + + fmt.Printf("\nšŸ” Secrets that would be generated:\n") + fmt.Printf(" - Cluster secret (64-hex)\n") + fmt.Printf(" - IPFS swarm key\n") + fmt.Printf(" - Node identity (Ed25519 keypair)\n") + + fmt.Printf("\nšŸ“ Configuration files that would be created:\n") + fmt.Printf(" - %s/configs/node.yaml\n", oramaDir) + fmt.Printf(" - %s/configs/olric/config.yaml\n", oramaDir) + + fmt.Printf("\nāš™ļø Systemd services that would be created:\n") + fmt.Printf(" - debros-ipfs.service\n") + fmt.Printf(" - debros-ipfs-cluster.service\n") + fmt.Printf(" - debros-olric.service\n") + fmt.Printf(" - debros-node.service (includes embedded gateway + RQLite)\n") + fmt.Printf(" - debros-anyone-client.service\n") + + fmt.Printf("\n🌐 Ports that would be used:\n") + fmt.Printf(" External (must be open in firewall):\n") + fmt.Printf(" - 80 (HTTP for ACME/Let's Encrypt)\n") + fmt.Printf(" - 443 (HTTPS gateway)\n") + fmt.Printf(" - 4101 (IPFS swarm)\n") + fmt.Printf(" - 7001 (RQLite Raft)\n") + fmt.Printf(" Internal (localhost only):\n") + fmt.Printf(" - 4501 (IPFS API)\n") + fmt.Printf(" - 5001 (RQLite HTTP)\n") + fmt.Printf(" - 6001 (Unified gateway)\n") + fmt.Printf(" - 8080 (IPFS gateway)\n") + fmt.Printf(" - 9050 (Anyone SOCKS5)\n") + fmt.Printf(" - 9094 (IPFS Cluster API)\n") + fmt.Printf(" - 3320/3322 (Olric)\n") + + fmt.Printf("\n" + strings.Repeat("=", 70) + "\n") + fmt.Printf("To proceed with installation, run without --dry-run\n") + fmt.Printf(strings.Repeat("=", 70) + "\n\n") +} + +// validateGeneratedConfig loads and validates the generated node configuration +func validateGeneratedConfig(oramaDir string) error { + configPath := filepath.Join(oramaDir, "configs", "node.yaml") + + // Check if config file exists + if _, err := os.Stat(configPath); os.IsNotExist(err) { + return fmt.Errorf("configuration file not found at %s", configPath) + } + + // Load the config file + file, err := os.Open(configPath) + if err != nil { + return fmt.Errorf("failed to open config file: %w", err) + } + defer file.Close() + + var cfg config.Config + if err := config.DecodeStrict(file, &cfg); err != nil { + return fmt.Errorf("failed to parse config: %w", err) + } + + // Validate the configuration + if errs := cfg.Validate(); len(errs) > 0 { + var errMsgs []string + for _, e := range errs { + errMsgs = append(errMsgs, e.Error()) + } + return fmt.Errorf("configuration validation errors:\n - %s", strings.Join(errMsgs, "\n - ")) + } + + return nil +} + +// validateDNSRecord validates that the domain points to the expected IP address +// Returns nil if DNS is valid, warning message if DNS doesn't match but continues, +// or error if DNS lookup fails completely +func validateDNSRecord(domain, expectedIP string) error { + if domain == "" { + return nil // No domain provided, skip validation + } + + ips, err := net.LookupIP(domain) + if err != nil { + // DNS lookup failed - this is a warning, not a fatal error + // The user might be setting up DNS after installation + fmt.Printf(" āš ļø DNS lookup failed for %s: %v\n", domain, err) + fmt.Printf(" Make sure DNS is configured before enabling HTTPS\n") + return nil + } + + // Check if any resolved IP matches the expected IP + for _, ip := range ips { + if ip.String() == expectedIP { + fmt.Printf(" āœ“ DNS validated: %s → %s\n", domain, expectedIP) + return nil + } + } + + // DNS doesn't point to expected IP - warn but continue + resolvedIPs := make([]string, len(ips)) + for i, ip := range ips { + resolvedIPs[i] = ip.String() + } + fmt.Printf(" āš ļø DNS mismatch: %s resolves to %v, expected %s\n", domain, resolvedIPs, expectedIP) + fmt.Printf(" HTTPS certificate generation may fail until DNS is updated\n") + return nil +} + // normalizePeers normalizes and validates peer multiaddrs func normalizePeers(peersStr string) ([]string, error) { if peersStr == "" { @@ -133,7 +286,9 @@ func showProdHelp() { fmt.Printf(" --join ADDR - RQLite join address IP:port (for joining cluster)\n") fmt.Printf(" --cluster-secret HEX - 64-hex cluster secret (required when joining)\n") fmt.Printf(" --branch BRANCH - Git branch to use (main or nightly, default: main)\n") + fmt.Printf(" --no-pull - Skip git clone/pull, use existing /home/debros/src\n") fmt.Printf(" --ignore-resource-checks - Skip disk/RAM/CPU prerequisite validation\n") + fmt.Printf(" --dry-run - Show what would be done without making changes\n") fmt.Printf(" upgrade - Upgrade existing installation (requires root/sudo)\n") fmt.Printf(" Options:\n") fmt.Printf(" --restart - Automatically restart services after upgrade\n") @@ -182,6 +337,8 @@ func handleProdInstall(args []string) { branch := fs.String("branch", "main", "Git branch to use (main or nightly)") clusterSecret := fs.String("cluster-secret", "", "Hex-encoded 32-byte cluster secret (for joining existing cluster)") interactive := fs.Bool("interactive", false, "Run interactive TUI installer") + dryRun := fs.Bool("dry-run", false, "Show what would be done without making changes") + noPull := fs.Bool("no-pull", false, "Skip git clone/pull, use existing /home/debros/src") if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { @@ -261,7 +418,13 @@ func handleProdInstall(args []string) { fmt.Printf(" āœ“ Cluster secret saved\n") } - setup := production.NewProductionSetup(oramaHome, os.Stdout, *force, *branch, false, *skipResourceChecks) + setup := production.NewProductionSetup(oramaHome, os.Stdout, *force, *branch, *noPull, *skipResourceChecks) + + // Inform user if skipping git pull + if *noPull { + fmt.Printf(" āš ļø --no-pull flag enabled: Skipping git clone/pull\n") + fmt.Printf(" Using existing repository at /home/debros/src\n") + } // Check port availability before proceeding if err := ensurePortsAvailable("install", defaultPorts()); err != nil { @@ -269,6 +432,18 @@ func handleProdInstall(args []string) { os.Exit(1) } + // Validate DNS if domain is provided + if *domain != "" { + fmt.Printf("\n🌐 Pre-flight DNS validation...\n") + validateDNSRecord(*domain, *vpsIP) + } + + // Dry-run mode: show what would be done and exit + if *dryRun { + showDryRunSummary(*vpsIP, *domain, *branch, peers, *joinAddress, isFirstNode, oramaDir) + return + } + // Save branch preference for future upgrades if err := production.SaveBranchPreference(oramaDir, *branch); err != nil { fmt.Fprintf(os.Stderr, "āš ļø Warning: Failed to save branch preference: %v\n", err) @@ -318,21 +493,26 @@ func handleProdInstall(args []string) { os.Exit(1) } + // Validate generated configuration + fmt.Printf(" Validating generated configuration...\n") + if err := validateGeneratedConfig(oramaDir); err != nil { + fmt.Fprintf(os.Stderr, "āŒ Configuration validation failed: %v\n", err) + os.Exit(1) + } + fmt.Printf(" āœ“ Configuration validated\n") + // Phase 5: Create systemd services fmt.Printf("\nšŸ”§ Phase 5: Creating systemd services...\n") - if err := setup.Phase5CreateSystemdServices(); err != nil { + if err := setup.Phase5CreateSystemdServices(enableHTTPS); err != nil { fmt.Fprintf(os.Stderr, "āŒ Service creation failed: %v\n", err) os.Exit(1) } - // Give services a moment to fully initialize before verification - fmt.Printf("\nā³ Waiting for services to initialize...\n") - time.Sleep(5 * time.Second) - - // Verify all services are running correctly - if err := verifyProductionRuntime("prod install"); err != nil { + // Verify all services are running correctly with exponential backoff retries + fmt.Printf("\nā³ Verifying services are healthy...\n") + if err := verifyProductionRuntimeWithRetry("prod install", 5, 3*time.Second); err != nil { fmt.Fprintf(os.Stderr, "āŒ %v\n", err) - fmt.Fprintf(os.Stderr, " Installation completed but services are not healthy. Check logs with: dbn prod logs \n") + fmt.Fprintf(os.Stderr, " Installation completed but services are not healthy. Check logs with: orama logs \n") os.Exit(1) } @@ -614,7 +794,7 @@ func handleProdUpgrade(args []string) { // Phase 5: Update systemd services fmt.Printf("\nšŸ”§ Phase 5: Updating systemd services...\n") - if err := setup.Phase5CreateSystemdServices(); err != nil { + if err := setup.Phase5CreateSystemdServices(enableHTTPS); err != nil { fmt.Fprintf(os.Stderr, "āš ļø Service update warning: %v\n", err) } @@ -638,13 +818,11 @@ func handleProdUpgrade(args []string) { } } fmt.Printf(" āœ“ All services restarted\n") - // Give services a moment to fully initialize before verification - fmt.Printf(" ā³ Waiting for services to initialize...\n") - time.Sleep(5 * time.Second) - // Verify services are healthy after restart - if err := verifyProductionRuntime("prod upgrade --restart"); err != nil { + // Verify services are healthy after restart with exponential backoff + fmt.Printf(" ā³ Verifying services are healthy...\n") + if err := verifyProductionRuntimeWithRetry("prod upgrade --restart", 5, 3*time.Second); err != nil { fmt.Fprintf(os.Stderr, "āŒ %v\n", err) - fmt.Fprintf(os.Stderr, " Upgrade completed but services are not healthy. Check logs with: dbn prod logs \n") + fmt.Fprintf(os.Stderr, " Upgrade completed but services are not healthy. Check logs with: orama logs \n") os.Exit(1) } fmt.Printf(" āœ… All services verified healthy\n") @@ -953,6 +1131,31 @@ func serviceExists(name string) bool { return err == nil } +// verifyProductionRuntimeWithRetry verifies services with exponential backoff retries +func verifyProductionRuntimeWithRetry(action string, maxAttempts int, initialWait time.Duration) error { + wait := initialWait + var lastErr error + + for attempt := 1; attempt <= maxAttempts; attempt++ { + lastErr = verifyProductionRuntime(action) + if lastErr == nil { + return nil + } + + if attempt < maxAttempts { + fmt.Printf(" ā³ Services not ready (attempt %d/%d), waiting %v...\n", attempt, maxAttempts, wait) + time.Sleep(wait) + // Exponential backoff with cap at 30 seconds + wait = wait * 2 + if wait > 30*time.Second { + wait = 30 * time.Second + } + } + } + + return lastErr +} + func verifyProductionRuntime(action string) error { services := getProductionServices() issues := make([]string, 0) @@ -968,7 +1171,7 @@ func verifyProductionRuntime(action string) error { } } - client := &http.Client{Timeout: 3 * time.Second} + client := tlsutil.NewHTTPClient(3 * time.Second) if err := checkHTTP(client, "GET", "http://127.0.0.1:5001/status", "RQLite status"); err == nil { } else if serviceExists("debros-node") { @@ -986,7 +1189,8 @@ func verifyProductionRuntime(action string) error { } if err := checkHTTP(client, "GET", "http://127.0.0.1:6001/health", "Gateway health"); err == nil { - } else if serviceExists("debros-gateway") { + } else if serviceExists("debros-node") { + // Gateway is now embedded in node, check debros-node instead issues = append(issues, err.Error()) } @@ -1007,10 +1211,11 @@ func getProductionServices() []string { allServices := []string{ "debros-gateway", "debros-node", + "debros-rqlite", "debros-olric", - // Note: RQLite is managed by node process, not as separate service "debros-ipfs-cluster", "debros-ipfs", + "debros-anyone-client", } // Filter to only existing services by checking if unit file exists @@ -1135,31 +1340,13 @@ func handleProdStart() { fmt.Printf(" ā³ Waiting for services to initialize...\n") time.Sleep(5 * time.Second) - // Wait for services to actually become active (with retries) - maxRetries := 6 - for i := 0; i < maxRetries; i++ { - allActive := true - for _, svc := range inactive { - active, err := isServiceActive(svc) - if err != nil || !active { - allActive = false - break - } - } - if allActive { - break - } - if i < maxRetries-1 { - time.Sleep(2 * time.Second) - } - } - - // Verify all services are healthy - if err := verifyProductionRuntime("prod start"); err != nil { + // Verify all services are healthy with exponential backoff retries + fmt.Printf(" ā³ Verifying services are healthy...\n") + if err := verifyProductionRuntimeWithRetry("prod start", 6, 2*time.Second); err != nil { fmt.Fprintf(os.Stderr, "āŒ %v\n", err) fmt.Fprintf(os.Stderr, "\n Services may still be starting. Check status with:\n") fmt.Fprintf(os.Stderr, " systemctl status debros-*\n") - fmt.Fprintf(os.Stderr, " dbn prod logs \n") + fmt.Fprintf(os.Stderr, " orama logs \n") os.Exit(1) } @@ -1180,6 +1367,13 @@ func handleProdStop() { return } + // First, disable all services to prevent auto-restart + disableArgs := []string{"disable"} + disableArgs = append(disableArgs, services...) + if err := exec.Command("systemctl", disableArgs...).Run(); err != nil { + fmt.Printf(" āš ļø Warning: Failed to disable some services: %v\n", err) + } + // Stop all services at once using a single systemctl command // This is more efficient and ensures they all stop together stopArgs := []string{"stop"} @@ -1193,7 +1387,6 @@ func handleProdStop() { time.Sleep(2 * time.Second) // Reset failed state for any services that might be in failed state - // This helps with services stuck in "activating auto-restart" resetArgs := []string{"reset-failed"} resetArgs = append(resetArgs, services...) exec.Command("systemctl", resetArgs...).Run() @@ -1201,7 +1394,7 @@ func handleProdStop() { // Wait again after reset-failed time.Sleep(1 * time.Second) - // Stop again to ensure they're stopped (in case reset-failed caused a restart) + // Stop again to ensure they're stopped exec.Command("systemctl", stopArgs...).Run() time.Sleep(1 * time.Second) @@ -1315,12 +1508,9 @@ func handleProdRestart() { } } - // Give services a moment to fully initialize before verification - fmt.Printf(" ā³ Waiting for services to initialize...\n") - time.Sleep(3 * time.Second) - - // Verify all services are healthy - if err := verifyProductionRuntime("prod restart"); err != nil { + // Verify all services are healthy with exponential backoff retries + fmt.Printf(" ā³ Verifying services are healthy...\n") + if err := verifyProductionRuntimeWithRetry("prod restart", 5, 3*time.Second); err != nil { fmt.Fprintf(os.Stderr, "āŒ %v\n", err) os.Exit(1) } @@ -1350,10 +1540,11 @@ func handleProdUninstall() { services := []string{ "debros-gateway", "debros-node", + "debros-rqlite", "debros-olric", - // Note: RQLite is managed by node process, not as separate service "debros-ipfs-cluster", "debros-ipfs", + "debros-anyone-client", } fmt.Printf("Stopping services...\n") diff --git a/pkg/cli/prod_commands_test.go b/pkg/cli/prod_commands_test.go index 874a9ff..1fd5925 100644 --- a/pkg/cli/prod_commands_test.go +++ b/pkg/cli/prod_commands_test.go @@ -5,76 +5,167 @@ import ( ) // TestProdCommandFlagParsing verifies that prod command flags are parsed correctly +// Note: The installer now uses --vps-ip presence to determine if it's a first node (no --bootstrap flag) +// First node: has --vps-ip but no --peers or --join +// Joining node: has --vps-ip, --peers, and --cluster-secret func TestProdCommandFlagParsing(t *testing.T) { tests := []struct { - name string - args []string - expectBootstrap bool - expectVPSIP string - expectBootstrapJoin string - expectPeers string + name string + args []string + expectVPSIP string + expectDomain string + expectPeers string + expectJoin string + expectSecret string + expectBranch string + isFirstNode bool // first node = no peers and no join address }{ { - name: "bootstrap node", - args: []string{"install", "--bootstrap"}, - expectBootstrap: true, + name: "first node (creates new cluster)", + args: []string{"install", "--vps-ip", "10.0.0.1", "--domain", "node-1.example.com"}, + expectVPSIP: "10.0.0.1", + expectDomain: "node-1.example.com", + isFirstNode: true, }, { - name: "non-bootstrap with vps-ip", - args: []string{"install", "--vps-ip", "10.0.0.2", "--peers", "multiaddr1,multiaddr2"}, - expectVPSIP: "10.0.0.2", - expectPeers: "multiaddr1,multiaddr2", + name: "joining node with peers", + args: []string{"install", "--vps-ip", "10.0.0.2", "--peers", "/ip4/10.0.0.1/tcp/4001/p2p/Qm123", "--cluster-secret", "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"}, + expectVPSIP: "10.0.0.2", + expectPeers: "/ip4/10.0.0.1/tcp/4001/p2p/Qm123", + expectSecret: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + isFirstNode: false, }, { - name: "secondary bootstrap", - args: []string{"install", "--bootstrap", "--vps-ip", "10.0.0.3", "--bootstrap-join", "10.0.0.1:7001"}, - expectBootstrap: true, - expectVPSIP: "10.0.0.3", - expectBootstrapJoin: "10.0.0.1:7001", + name: "joining node with join address", + args: []string{"install", "--vps-ip", "10.0.0.3", "--join", "10.0.0.1:7001", "--cluster-secret", "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"}, + expectVPSIP: "10.0.0.3", + expectJoin: "10.0.0.1:7001", + expectSecret: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef", + isFirstNode: false, }, { - name: "with domain", - args: []string{"install", "--bootstrap", "--domain", "example.com"}, - expectBootstrap: true, + name: "with nightly branch", + args: []string{"install", "--vps-ip", "10.0.0.4", "--branch", "nightly"}, + expectVPSIP: "10.0.0.4", + expectBranch: "nightly", + isFirstNode: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Extract flags manually to verify parsing logic - isBootstrap := false - var vpsIP, peersStr, bootstrapJoin string + var vpsIP, domain, peersStr, joinAddr, clusterSecret, branch string for i, arg := range tt.args { switch arg { - case "--bootstrap": - isBootstrap = true - case "--peers": - if i+1 < len(tt.args) { - peersStr = tt.args[i+1] - } case "--vps-ip": if i+1 < len(tt.args) { vpsIP = tt.args[i+1] } - case "--bootstrap-join": + case "--domain": if i+1 < len(tt.args) { - bootstrapJoin = tt.args[i+1] + domain = tt.args[i+1] + } + case "--peers": + if i+1 < len(tt.args) { + peersStr = tt.args[i+1] + } + case "--join": + if i+1 < len(tt.args) { + joinAddr = tt.args[i+1] + } + case "--cluster-secret": + if i+1 < len(tt.args) { + clusterSecret = tt.args[i+1] + } + case "--branch": + if i+1 < len(tt.args) { + branch = tt.args[i+1] } } } - if isBootstrap != tt.expectBootstrap { - t.Errorf("expected bootstrap=%v, got %v", tt.expectBootstrap, isBootstrap) - } + // First node detection: no peers and no join address + isFirstNode := peersStr == "" && joinAddr == "" + if vpsIP != tt.expectVPSIP { t.Errorf("expected vpsIP=%q, got %q", tt.expectVPSIP, vpsIP) } + if domain != tt.expectDomain { + t.Errorf("expected domain=%q, got %q", tt.expectDomain, domain) + } if peersStr != tt.expectPeers { t.Errorf("expected peers=%q, got %q", tt.expectPeers, peersStr) } - if bootstrapJoin != tt.expectBootstrapJoin { - t.Errorf("expected bootstrapJoin=%q, got %q", tt.expectBootstrapJoin, bootstrapJoin) + if joinAddr != tt.expectJoin { + t.Errorf("expected join=%q, got %q", tt.expectJoin, joinAddr) + } + if clusterSecret != tt.expectSecret { + t.Errorf("expected clusterSecret=%q, got %q", tt.expectSecret, clusterSecret) + } + if branch != tt.expectBranch { + t.Errorf("expected branch=%q, got %q", tt.expectBranch, branch) + } + if isFirstNode != tt.isFirstNode { + t.Errorf("expected isFirstNode=%v, got %v", tt.isFirstNode, isFirstNode) + } + }) + } +} + +// TestNormalizePeers tests the peer multiaddr normalization +func TestNormalizePeers(t *testing.T) { + tests := []struct { + name string + input string + expectCount int + expectError bool + }{ + { + name: "empty string", + input: "", + expectCount: 0, + expectError: false, + }, + { + name: "single peer", + input: "/ip4/10.0.0.1/tcp/4001/p2p/QmTest123", + expectCount: 1, + expectError: false, + }, + { + name: "multiple peers", + input: "/ip4/10.0.0.1/tcp/4001/p2p/QmTest1,/ip4/10.0.0.2/tcp/4001/p2p/QmTest2", + expectCount: 2, + expectError: false, + }, + { + name: "duplicate peers deduplicated", + input: "/ip4/10.0.0.1/tcp/4001/p2p/QmTest1,/ip4/10.0.0.1/tcp/4001/p2p/QmTest1", + expectCount: 1, + expectError: false, + }, + { + name: "invalid multiaddr", + input: "not-a-multiaddr", + expectCount: 0, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + peers, err := normalizePeers(tt.input) + + if tt.expectError && err == nil { + t.Errorf("expected error but got none") + } + if !tt.expectError && err != nil { + t.Errorf("unexpected error: %v", err) + } + if len(peers) != tt.expectCount { + t.Errorf("expected %d peers, got %d", tt.expectCount, len(peers)) } }) } diff --git a/pkg/config/config.go b/pkg/config/config.go index 2698c1c..ab5b6c9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -119,15 +119,16 @@ type HTTPGatewayConfig struct { // HTTPSConfig contains HTTPS/TLS configuration for the gateway type HTTPSConfig struct { - Enabled bool `yaml:"enabled"` // Enable HTTPS (port 443) - Domain string `yaml:"domain"` // Primary domain (e.g., node-123.orama.network) - AutoCert bool `yaml:"auto_cert"` // Use Let's Encrypt for automatic certificate - CertFile string `yaml:"cert_file"` // Path to certificate file (if not using auto_cert) - KeyFile string `yaml:"key_file"` // Path to key file (if not using auto_cert) - CacheDir string `yaml:"cache_dir"` // Directory for Let's Encrypt certificate cache - HTTPPort int `yaml:"http_port"` // HTTP port for ACME challenge (default: 80) - HTTPSPort int `yaml:"https_port"` // HTTPS port (default: 443) - Email string `yaml:"email"` // Email for Let's Encrypt account + Enabled bool `yaml:"enabled"` // Enable HTTPS (port 443) + Domain string `yaml:"domain"` // Primary domain (e.g., node-123.orama.network) + AutoCert bool `yaml:"auto_cert"` // Use Let's Encrypt for automatic certificate + UseSelfSigned bool `yaml:"use_self_signed"` // Use self-signed certificates (pre-generated) + CertFile string `yaml:"cert_file"` // Path to certificate file (if not using auto_cert) + KeyFile string `yaml:"key_file"` // Path to key file (if not using auto_cert) + CacheDir string `yaml:"cache_dir"` // Directory for Let's Encrypt certificate cache + HTTPPort int `yaml:"http_port"` // HTTP port for ACME challenge (default: 80) + HTTPSPort int `yaml:"https_port"` // HTTPS port (default: 443) + Email string `yaml:"email"` // Email for Let's Encrypt account } // SNIConfig contains SNI-based TCP routing configuration for port 7001 diff --git a/pkg/config/validate.go b/pkg/config/validate.go index 3f3c802..d07e67d 100644 --- a/pkg/config/validate.go +++ b/pkg/config/validate.go @@ -279,9 +279,7 @@ func (c *Config) validateDiscovery() []error { }) } - // Validate peer addresses (optional - can be empty for first node) - // All nodes are unified, so peer addresses are optional - + // Validate peer addresses (optional - all nodes are unified peers now) // Validate each peer multiaddr seenPeers := make(map[string]bool) for i, peer := range disc.BootstrapPeers { diff --git a/pkg/config/validate_test.go b/pkg/config/validate_test.go index 46f0c22..4599234 100644 --- a/pkg/config/validate_test.go +++ b/pkg/config/validate_test.go @@ -167,7 +167,7 @@ func TestValidatePeerAddresses(t *testing.T) { shouldError bool }{ {"node with peer", []string{validPeer}, false}, - {"node without peer", []string{}, true}, // All nodes need peer addresses + {"node without peer", []string{}, false}, // All nodes are unified peers - bootstrap peers optional {"invalid multiaddr", []string{"invalid"}, true}, {"missing p2p", []string{"/ip4/127.0.0.1/tcp/4001"}, true}, {"duplicate peer", []string{validPeer, validPeer}, true}, diff --git a/pkg/environments/development/checks_test.go b/pkg/environments/development/checks_test.go index 739f3c8..2005e89 100644 --- a/pkg/environments/development/checks_test.go +++ b/pkg/environments/development/checks_test.go @@ -17,7 +17,8 @@ func TestPortChecker(t *testing.T) { } // Check that required port counts match expectations - expectedPortCount := 44 // Based on RequiredPorts + // 5 nodes Ɨ 9 ports per node + 4 shared ports = 49 + expectedPortCount := 49 // Based on RequiredPorts if len(checker.ports) != expectedPortCount { t.Errorf("Expected %d ports, got %d", expectedPortCount, len(checker.ports)) } diff --git a/pkg/environments/development/health.go b/pkg/environments/development/health.go index f2b942f..d60ac98 100644 --- a/pkg/environments/development/health.go +++ b/pkg/environments/development/health.go @@ -9,6 +9,8 @@ import ( "os/exec" "strings" "time" + + "github.com/DeBrosOfficial/network/pkg/tlsutil" ) // HealthCheckResult represents the result of a health check @@ -79,7 +81,7 @@ func (pm *ProcessManager) checkRQLiteNode(ctx context.Context, name string, http result := HealthCheckResult{Name: fmt.Sprintf("RQLite-%s", name)} urlStr := fmt.Sprintf("http://localhost:%d/status", httpPort) - client := &http.Client{Timeout: 2 * time.Second} + client := tlsutil.NewHTTPClient(2 * time.Second) resp, err := client.Get(urlStr) if err != nil { result.Details = fmt.Sprintf("connection failed: %v", err) diff --git a/pkg/environments/development/runner.go b/pkg/environments/development/runner.go index 8d97c05..35cb6bf 100644 --- a/pkg/environments/development/runner.go +++ b/pkg/environments/development/runner.go @@ -15,6 +15,8 @@ import ( "strings" "sync" "time" + + "github.com/DeBrosOfficial/network/pkg/tlsutil" ) // ProcessManager manages all dev environment processes @@ -481,7 +483,7 @@ func (pm *ProcessManager) waitIPFSReady(ctx context.Context, node ipfsNodeInfo) // ipfsHTTPCall makes an HTTP call to IPFS API func (pm *ProcessManager) ipfsHTTPCall(ctx context.Context, urlStr string, method string) error { - client := &http.Client{Timeout: 5 * time.Second} + client := tlsutil.NewHTTPClient(5 * time.Second) req, err := http.NewRequestWithContext(ctx, method, urlStr, nil) if err != nil { return fmt.Errorf("failed to create request: %w", err) diff --git a/pkg/environments/production/config.go b/pkg/environments/production/config.go index 36ab1a6..cdcc23b 100644 --- a/pkg/environments/production/config.go +++ b/pkg/environments/production/config.go @@ -94,7 +94,7 @@ func inferPeerIP(peers []string, vpsIP string) string { } // GenerateNodeConfig generates node.yaml configuration (unified architecture) -func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP string, joinAddress string, domain string) (string, error) { +func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP string, joinAddress string, domain string, enableHTTPS bool) (string, error) { // Generate node ID from domain or use default nodeID := "node" if domain != "" { @@ -106,10 +106,16 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri } // Determine advertise addresses - use vpsIP if provided + // When HTTPS/SNI is enabled, use domain-based raft address for SNI routing var httpAdvAddr, raftAdvAddr string if vpsIP != "" { httpAdvAddr = net.JoinHostPort(vpsIP, "5001") - raftAdvAddr = net.JoinHostPort(vpsIP, "7001") + if enableHTTPS && domain != "" { + // Use SNI domain for Raft advertisement so other nodes connect via SNI gateway + raftAdvAddr = fmt.Sprintf("raft.%s:7001", domain) + } else { + raftAdvAddr = net.JoinHostPort(vpsIP, "7001") + } } else { // Fallback to localhost if no vpsIP httpAdvAddr = "localhost:5001" @@ -134,21 +140,40 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri } // If no join address and no peers, this is the first node - it will create the cluster + // TLS/ACME configuration + tlsCacheDir := "" + httpPort := 80 + httpsPort := 443 + if enableHTTPS { + tlsCacheDir = filepath.Join(cg.oramaDir, "tls-cache") + } + // Unified data directory (all nodes equal) + // When HTTPS/SNI is enabled, use internal port 7002 for RQLite Raft (SNI gateway listens on 7001) + raftInternalPort := 7001 + if enableHTTPS { + raftInternalPort = 7002 // Internal port when SNI is enabled + } + data := templates.NodeConfigData{ - NodeID: nodeID, - P2PPort: 4001, - DataDir: filepath.Join(cg.oramaDir, "data"), - RQLiteHTTPPort: 5001, - RQLiteRaftPort: 7001, - RQLiteJoinAddress: rqliteJoinAddr, - BootstrapPeers: peerAddresses, - ClusterAPIPort: 9094, - IPFSAPIPort: 4501, - HTTPAdvAddress: httpAdvAddr, - RaftAdvAddress: raftAdvAddr, - UnifiedGatewayPort: 6001, - Domain: domain, + NodeID: nodeID, + P2PPort: 4001, + DataDir: filepath.Join(cg.oramaDir, "data"), + RQLiteHTTPPort: 5001, + RQLiteRaftPort: 7001, // External SNI port + RQLiteRaftInternalPort: raftInternalPort, // Internal RQLite binding port + RQLiteJoinAddress: rqliteJoinAddr, + BootstrapPeers: peerAddresses, + ClusterAPIPort: 9094, + IPFSAPIPort: 4501, + HTTPAdvAddress: httpAdvAddr, + RaftAdvAddress: raftAdvAddr, + UnifiedGatewayPort: 6001, + Domain: domain, + EnableHTTPS: enableHTTPS, + TLSCacheDir: tlsCacheDir, + HTTPPort: httpPort, + HTTPSPort: httpsPort, } return templates.RenderNodeConfig(data) } @@ -216,10 +241,14 @@ func (sg *SecretGenerator) EnsureClusterSecret() (string, error) { secretPath := filepath.Join(sg.oramaDir, "secrets", "cluster-secret") secretDir := filepath.Dir(secretPath) - // Ensure secrets directory exists - if err := os.MkdirAll(secretDir, 0755); err != nil { + // Ensure secrets directory exists with restricted permissions (0700) + if err := os.MkdirAll(secretDir, 0700); err != nil { return "", fmt.Errorf("failed to create secrets directory: %w", err) } + // Ensure directory permissions are correct even if it already existed + if err := os.Chmod(secretDir, 0700); err != nil { + return "", fmt.Errorf("failed to set secrets directory permissions: %w", err) + } // Try to read existing secret if data, err := os.ReadFile(secretPath); err == nil { @@ -277,10 +306,14 @@ func (sg *SecretGenerator) EnsureSwarmKey() ([]byte, error) { swarmKeyPath := filepath.Join(sg.oramaDir, "secrets", "swarm.key") secretDir := filepath.Dir(swarmKeyPath) - // Ensure secrets directory exists - if err := os.MkdirAll(secretDir, 0755); err != nil { + // Ensure secrets directory exists with restricted permissions (0700) + if err := os.MkdirAll(secretDir, 0700); err != nil { return nil, fmt.Errorf("failed to create secrets directory: %w", err) } + // Ensure directory permissions are correct even if it already existed + if err := os.Chmod(secretDir, 0700); err != nil { + return nil, fmt.Errorf("failed to set secrets directory permissions: %w", err) + } // Try to read existing key if data, err := os.ReadFile(swarmKeyPath); err == nil { diff --git a/pkg/environments/production/installers.go b/pkg/environments/production/installers.go index 9b18b95..3b579f6 100644 --- a/pkg/environments/production/installers.go +++ b/pkg/environments/production/installers.go @@ -3,6 +3,7 @@ package production import ( "encoding/json" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -12,11 +13,11 @@ import ( // BinaryInstaller handles downloading and installing external binaries type BinaryInstaller struct { arch string - logWriter interface{} // io.Writer + logWriter io.Writer } // NewBinaryInstaller creates a new binary installer -func NewBinaryInstaller(arch string, logWriter interface{}) *BinaryInstaller { +func NewBinaryInstaller(arch string, logWriter io.Writer) *BinaryInstaller { return &BinaryInstaller{ arch: arch, logWriter: logWriter, @@ -26,11 +27,11 @@ func NewBinaryInstaller(arch string, logWriter interface{}) *BinaryInstaller { // InstallRQLite downloads and installs RQLite func (bi *BinaryInstaller) InstallRQLite() error { if _, err := exec.LookPath("rqlited"); err == nil { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ RQLite already installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ RQLite already installed\n") return nil } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Installing RQLite...\n") + fmt.Fprintf(bi.logWriter, " Installing RQLite...\n") version := "8.43.0" tarball := fmt.Sprintf("rqlite-v%s-linux-%s.tar.gz", version, bi.arch) @@ -53,12 +54,14 @@ func (bi *BinaryInstaller) InstallRQLite() error { if err := exec.Command("cp", dir+"/rqlited", "/usr/local/bin/").Run(); err != nil { return fmt.Errorf("failed to copy rqlited binary: %w", err) } - exec.Command("chmod", "+x", "/usr/local/bin/rqlited").Run() + if err := exec.Command("chmod", "+x", "/usr/local/bin/rqlited").Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chmod rqlited: %v\n", err) + } // Ensure PATH includes /usr/local/bin os.Setenv("PATH", os.Getenv("PATH")+":/usr/local/bin") - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ RQLite installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ RQLite installed\n") return nil } @@ -66,11 +69,11 @@ func (bi *BinaryInstaller) InstallRQLite() error { // Follows official steps from https://docs.ipfs.tech/install/command-line/ func (bi *BinaryInstaller) InstallIPFS() error { if _, err := exec.LookPath("ipfs"); err == nil { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ IPFS already installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ IPFS already installed\n") return nil } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Installing IPFS (Kubo)...\n") + fmt.Fprintf(bi.logWriter, " Installing IPFS (Kubo)...\n") // Follow official installation steps in order kuboVersion := "v0.38.2" @@ -81,7 +84,7 @@ func (bi *BinaryInstaller) InstallIPFS() error { kuboDir := filepath.Join(tmpDir, "kubo") // Step 1: Download the Linux binary from dist.ipfs.tech - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Step 1: Downloading Kubo v%s...\n", kuboVersion) + fmt.Fprintf(bi.logWriter, " Step 1: Downloading Kubo v%s...\n", kuboVersion) cmd := exec.Command("wget", "-q", url, "-O", tarPath) if err := cmd.Run(); err != nil { return fmt.Errorf("failed to download kubo from %s: %w", url, err) @@ -93,7 +96,7 @@ func (bi *BinaryInstaller) InstallIPFS() error { } // Step 2: Unzip the file - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Step 2: Extracting Kubo archive...\n") + fmt.Fprintf(bi.logWriter, " Step 2: Extracting Kubo archive...\n") cmd = exec.Command("tar", "-xzf", tarPath, "-C", tmpDir) if err := cmd.Run(); err != nil { return fmt.Errorf("failed to extract kubo tarball: %w", err) @@ -105,7 +108,7 @@ func (bi *BinaryInstaller) InstallIPFS() error { } // Step 3: Move into the kubo folder (cd kubo) - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Step 3: Running installation script...\n") + fmt.Fprintf(bi.logWriter, " Step 3: Running installation script...\n") // Step 4: Run the installation script (sudo bash install.sh) installScript := filepath.Join(kuboDir, "install.sh") @@ -120,7 +123,7 @@ func (bi *BinaryInstaller) InstallIPFS() error { } // Step 5: Test that Kubo has installed correctly - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Step 5: Verifying installation...\n") + fmt.Fprintf(bi.logWriter, " Step 5: Verifying installation...\n") cmd = exec.Command("ipfs", "--version") output, err := cmd.CombinedOutput() if err != nil { @@ -141,24 +144,24 @@ func (bi *BinaryInstaller) InstallIPFS() error { return fmt.Errorf("ipfs binary not found after installation in %v", ipfsLocations) } } else { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " %s", string(output)) + fmt.Fprintf(bi.logWriter, " %s", string(output)) } // Ensure PATH is updated for current process os.Setenv("PATH", os.Getenv("PATH")+":/usr/local/bin") - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ IPFS installed successfully\n") + fmt.Fprintf(bi.logWriter, " āœ“ IPFS installed successfully\n") return nil } // InstallIPFSCluster downloads and installs IPFS Cluster Service func (bi *BinaryInstaller) InstallIPFSCluster() error { if _, err := exec.LookPath("ipfs-cluster-service"); err == nil { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ IPFS Cluster already installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ IPFS Cluster already installed\n") return nil } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Installing IPFS Cluster Service...\n") + fmt.Fprintf(bi.logWriter, " Installing IPFS Cluster Service...\n") // Check if Go is available if _, err := exec.LookPath("go"); err != nil { @@ -171,18 +174,18 @@ func (bi *BinaryInstaller) InstallIPFSCluster() error { return fmt.Errorf("failed to install IPFS Cluster: %w", err) } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ IPFS Cluster installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ IPFS Cluster installed\n") return nil } // InstallOlric downloads and installs Olric server func (bi *BinaryInstaller) InstallOlric() error { if _, err := exec.LookPath("olric-server"); err == nil { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ Olric already installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ Olric already installed\n") return nil } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Installing Olric...\n") + fmt.Fprintf(bi.logWriter, " Installing Olric...\n") // Check if Go is available if _, err := exec.LookPath("go"); err != nil { @@ -195,20 +198,20 @@ func (bi *BinaryInstaller) InstallOlric() error { return fmt.Errorf("failed to install Olric: %w", err) } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ Olric installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ Olric installed\n") return nil } // InstallGo downloads and installs Go toolchain func (bi *BinaryInstaller) InstallGo() error { if _, err := exec.LookPath("go"); err == nil { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ Go already installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ Go already installed\n") return nil } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Installing Go...\n") + fmt.Fprintf(bi.logWriter, " Installing Go...\n") - goTarball := fmt.Sprintf("go1.21.6.linux-%s.tar.gz", bi.arch) + goTarball := fmt.Sprintf("go1.22.5.linux-%s.tar.gz", bi.arch) goURL := fmt.Sprintf("https://go.dev/dl/%s", goTarball) // Download @@ -232,7 +235,7 @@ func (bi *BinaryInstaller) InstallGo() error { return fmt.Errorf("go installed but not found in PATH after installation") } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ Go installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ Go installed\n") return nil } @@ -276,45 +279,49 @@ func (bi *BinaryInstaller) ResolveBinaryPath(binary string, extraPaths ...string // InstallDeBrosBinaries clones and builds DeBros binaries func (bi *BinaryInstaller) InstallDeBrosBinaries(branch string, oramaHome string, skipRepoUpdate bool) error { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Building DeBros binaries...\n") + fmt.Fprintf(bi.logWriter, " Building DeBros binaries...\n") srcDir := filepath.Join(oramaHome, "src") binDir := filepath.Join(oramaHome, "bin") // Ensure directories exist - os.MkdirAll(srcDir, 0755) - os.MkdirAll(binDir, 0755) + if err := os.MkdirAll(srcDir, 0755); err != nil { + return fmt.Errorf("failed to create source directory %s: %w", srcDir, err) + } + if err := os.MkdirAll(binDir, 0755); err != nil { + return fmt.Errorf("failed to create bin directory %s: %w", binDir, err) + } + + // Check if source directory has content (either git repo or pre-existing source) + hasSourceContent := false + if entries, err := os.ReadDir(srcDir); err == nil && len(entries) > 0 { + hasSourceContent = true + } // Check if git repository is already initialized - repoInitialized := false + isGitRepo := false if _, err := os.Stat(filepath.Join(srcDir, ".git")); err == nil { - repoInitialized = true + isGitRepo = true } // Handle repository update/clone based on skipRepoUpdate flag if skipRepoUpdate { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Skipping repo clone/pull (--no-pull flag)\n") - if !repoInitialized { - return fmt.Errorf("cannot skip pull: repository not found at %s", srcDir) + fmt.Fprintf(bi.logWriter, " Skipping repo clone/pull (--no-pull flag)\n") + if !hasSourceContent { + return fmt.Errorf("cannot skip pull: source directory is empty at %s (need to populate it first)", srcDir) } - // Verify srcDir exists and has content - if entries, err := os.ReadDir(srcDir); err != nil { - return fmt.Errorf("failed to read source directory %s: %w", srcDir, err) - } else if len(entries) == 0 { - return fmt.Errorf("source directory %s is empty", srcDir) - } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Using existing repository at %s (skipping git operations)\n", srcDir) + fmt.Fprintf(bi.logWriter, " Using existing source at %s (skipping git operations)\n", srcDir) // Skip to build step - don't execute any git commands } else { // Clone repository if not present, otherwise update it - if !repoInitialized { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Cloning repository...\n") + if !isGitRepo { + fmt.Fprintf(bi.logWriter, " Cloning repository...\n") cmd := exec.Command("git", "clone", "--branch", branch, "--depth", "1", "https://github.com/DeBrosOfficial/network.git", srcDir) if err := cmd.Run(); err != nil { return fmt.Errorf("failed to clone repository: %w", err) } } else { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Updating repository to latest changes...\n") + fmt.Fprintf(bi.logWriter, " Updating repository to latest changes...\n") if output, err := exec.Command("git", "-C", srcDir, "fetch", "origin", branch).CombinedOutput(); err != nil { return fmt.Errorf("failed to fetch repository updates: %v\n%s", err, string(output)) } @@ -328,7 +335,7 @@ func (bi *BinaryInstaller) InstallDeBrosBinaries(branch string, oramaHome string } // Build binaries - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Building binaries...\n") + fmt.Fprintf(bi.logWriter, " Building binaries...\n") cmd := exec.Command("make", "build") cmd.Dir = srcDir cmd.Env = append(os.Environ(), "HOME="+oramaHome, "PATH="+os.Getenv("PATH")+":/usr/local/go/bin") @@ -337,7 +344,7 @@ func (bi *BinaryInstaller) InstallDeBrosBinaries(branch string, oramaHome string } // Copy binaries - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Copying binaries...\n") + fmt.Fprintf(bi.logWriter, " Copying binaries...\n") srcBinDir := filepath.Join(srcDir, "bin") // Check if source bin directory exists @@ -374,21 +381,25 @@ func (bi *BinaryInstaller) InstallDeBrosBinaries(branch string, oramaHome string } } - exec.Command("chmod", "-R", "755", binDir).Run() - exec.Command("chown", "-R", "debros:debros", binDir).Run() + if err := exec.Command("chmod", "-R", "755", binDir).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chmod bin directory: %v\n", err) + } + if err := exec.Command("chown", "-R", "debros:debros", binDir).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chown bin directory: %v\n", err) + } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ DeBros binaries installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ DeBros binaries installed\n") return nil } // InstallSystemDependencies installs system-level dependencies via apt func (bi *BinaryInstaller) InstallSystemDependencies() error { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Installing system dependencies...\n") + fmt.Fprintf(bi.logWriter, " Installing system dependencies...\n") // Update package list cmd := exec.Command("apt-get", "update") if err := cmd.Run(); err != nil { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Warning: apt update failed\n") + fmt.Fprintf(bi.logWriter, " Warning: apt update failed\n") } // Install dependencies including Node.js for anyone-client @@ -397,7 +408,7 @@ func (bi *BinaryInstaller) InstallSystemDependencies() error { return fmt.Errorf("failed to install dependencies: %w", err) } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ System dependencies installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ System dependencies installed\n") return nil } @@ -407,9 +418,9 @@ func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath repoExists := false if _, err := os.Stat(configPath); err == nil { repoExists = true - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " IPFS repo already exists, ensuring configuration...\n") + fmt.Fprintf(bi.logWriter, " IPFS repo already exists, ensuring configuration...\n") } else { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Initializing IPFS repo...\n") + fmt.Fprintf(bi.logWriter, " Initializing IPFS repo...\n") } if err := os.MkdirAll(ipfsRepoPath, 0755); err != nil { @@ -442,7 +453,7 @@ func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath // Configure IPFS addresses (API, Gateway, Swarm) by modifying the config file directly // This ensures the ports are set correctly and avoids conflicts with RQLite on port 5001 - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Configuring IPFS addresses (API: %d, Gateway: %d, Swarm: %d)...\n", apiPort, gatewayPort, swarmPort) + fmt.Fprintf(bi.logWriter, " Configuring IPFS addresses (API: %d, Gateway: %d, Swarm: %d)...\n", apiPort, gatewayPort, swarmPort) if err := bi.configureIPFSAddresses(ipfsRepoPath, apiPort, gatewayPort, swarmPort); err != nil { return fmt.Errorf("failed to configure IPFS addresses: %w", err) } @@ -451,7 +462,7 @@ func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath // This is critical - IPFS will fail to start if AutoConf is enabled on a private network // We do this even for existing repos to fix repos initialized before this fix was applied if swarmKeyExists { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Disabling AutoConf for private swarm...\n") + fmt.Fprintf(bi.logWriter, " Disabling AutoConf for private swarm...\n") cmd := exec.Command(ipfsBinary, "config", "--json", "AutoConf.Enabled", "false") cmd.Env = append(os.Environ(), "IPFS_PATH="+ipfsRepoPath) if output, err := cmd.CombinedOutput(); err != nil { @@ -460,7 +471,7 @@ func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath // Clear AutoConf placeholders from config to prevent Kubo startup errors // When AutoConf is disabled, 'auto' placeholders must be replaced with explicit values or empty - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Clearing AutoConf placeholders from IPFS config...\n") + fmt.Fprintf(bi.logWriter, " Clearing AutoConf placeholders from IPFS config...\n") type configCommand struct { desc string @@ -476,7 +487,7 @@ func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath } for _, step := range cleanup { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " %s...\n", step.desc) + fmt.Fprintf(bi.logWriter, " %s...\n", step.desc) cmd := exec.Command(ipfsBinary, step.args...) cmd.Env = append(os.Environ(), "IPFS_PATH="+ipfsRepoPath) if output, err := cmd.CombinedOutput(); err != nil { @@ -485,8 +496,10 @@ func (bi *BinaryInstaller) InitializeIPFSRepo(ipfsRepoPath string, swarmKeyPath } } - // Fix ownership - exec.Command("chown", "-R", "debros:debros", ipfsRepoPath).Run() + // Fix ownership (best-effort, don't fail if it doesn't work) + if err := exec.Command("chown", "-R", "debros:debros", ipfsRepoPath).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chown IPFS repo: %v\n", err) + } return nil } @@ -506,21 +519,29 @@ func (bi *BinaryInstaller) configureIPFSAddresses(ipfsRepoPath string, apiPort, return fmt.Errorf("failed to parse IPFS config: %w", err) } - // Set Addresses - // Bind API and Gateway to localhost only for security - // Swarm remains on all interfaces for peer connections (routed via SNI gateway in production) - config["Addresses"] = map[string]interface{}{ - "API": []string{ - fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", apiPort), - }, - "Gateway": []string{ - fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", gatewayPort), - }, - "Swarm": []string{ - fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", swarmPort), - }, + // Get existing Addresses section or create new one + // This preserves any existing settings like Announce, AppendAnnounce, NoAnnounce + addresses, ok := config["Addresses"].(map[string]interface{}) + if !ok { + addresses = make(map[string]interface{}) } + // Update specific address fields while preserving others + // Bind API and Gateway to localhost only for security + // Swarm binds to all interfaces for peer connections + addresses["API"] = []string{ + fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", apiPort), + } + addresses["Gateway"] = []string{ + fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", gatewayPort), + } + addresses["Swarm"] = []string{ + fmt.Sprintf("/ip4/0.0.0.0/tcp/%d", swarmPort), + fmt.Sprintf("/ip6/::/tcp/%d", swarmPort), + } + + config["Addresses"] = addresses + // Write config back updatedData, err := json.MarshalIndent(config, "", " ") if err != nil { @@ -543,17 +564,19 @@ func (bi *BinaryInstaller) InitializeIPFSClusterConfig(clusterPath, clusterSecre configExists := false if _, err := os.Stat(serviceJSONPath); err == nil { configExists = true - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " IPFS Cluster config already exists, ensuring it's up to date...\n") + fmt.Fprintf(bi.logWriter, " IPFS Cluster config already exists, ensuring it's up to date...\n") } else { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Preparing IPFS Cluster path...\n") + fmt.Fprintf(bi.logWriter, " Preparing IPFS Cluster path...\n") } if err := os.MkdirAll(clusterPath, 0755); err != nil { return fmt.Errorf("failed to create IPFS Cluster directory: %w", err) } - // Fix ownership before running init - exec.Command("chown", "-R", "debros:debros", clusterPath).Run() + // Fix ownership before running init (best-effort) + if err := exec.Command("chown", "-R", "debros:debros", clusterPath).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chown cluster path before init: %v\n", err) + } // Resolve ipfs-cluster-service binary path clusterBinary, err := bi.ResolveBinaryPath("ipfs-cluster-service", "/usr/local/bin/ipfs-cluster-service", "/usr/bin/ipfs-cluster-service") @@ -565,7 +588,7 @@ func (bi *BinaryInstaller) InitializeIPFSClusterConfig(clusterPath, clusterSecre if !configExists { // Initialize cluster config with ipfs-cluster-service init // This creates the service.json file with all required sections - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Initializing IPFS Cluster config...\n") + fmt.Fprintf(bi.logWriter, " Initializing IPFS Cluster config...\n") cmd := exec.Command(clusterBinary, "init", "--force") cmd.Env = append(os.Environ(), "IPFS_CLUSTER_PATH="+clusterPath) // Pass CLUSTER_SECRET to init so it writes the correct secret to service.json directly @@ -581,7 +604,7 @@ func (bi *BinaryInstaller) InitializeIPFSClusterConfig(clusterPath, clusterSecre // This ensures existing installations get the secret and port synchronized // We do this AFTER init to ensure our secret takes precedence if clusterSecret != "" { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Updating cluster secret, IPFS port, and peer addresses...\n") + fmt.Fprintf(bi.logWriter, " Updating cluster secret, IPFS port, and peer addresses...\n") if err := bi.updateClusterConfig(clusterPath, clusterSecret, ipfsAPIPort, clusterPeers); err != nil { return fmt.Errorf("failed to update cluster config: %w", err) } @@ -590,11 +613,13 @@ func (bi *BinaryInstaller) InitializeIPFSClusterConfig(clusterPath, clusterSecre if err := bi.verifyClusterSecret(clusterPath, clusterSecret); err != nil { return fmt.Errorf("cluster secret verification failed: %w", err) } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ Cluster secret verified\n") + fmt.Fprintf(bi.logWriter, " āœ“ Cluster secret verified\n") } - // Fix ownership again after updates - exec.Command("chown", "-R", "debros:debros", clusterPath).Run() + // Fix ownership again after updates (best-effort) + if err := exec.Command("chown", "-R", "debros:debros", clusterPath).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chown cluster path after updates: %v\n", err) + } return nil } @@ -719,13 +744,15 @@ func (bi *BinaryInstaller) GetClusterPeerMultiaddr(clusterPath string, nodeIP st // InitializeRQLiteDataDir initializes RQLite data directory func (bi *BinaryInstaller) InitializeRQLiteDataDir(dataDir string) error { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Initializing RQLite data dir...\n") + fmt.Fprintf(bi.logWriter, " Initializing RQLite data dir...\n") if err := os.MkdirAll(dataDir, 0755); err != nil { return fmt.Errorf("failed to create RQLite data directory: %w", err) } - exec.Command("chown", "-R", "debros:debros", dataDir).Run() + if err := exec.Command("chown", "-R", "debros:debros", dataDir).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chown RQLite data dir: %v\n", err) + } return nil } @@ -733,11 +760,50 @@ func (bi *BinaryInstaller) InitializeRQLiteDataDir(dataDir string) error { func (bi *BinaryInstaller) InstallAnyoneClient() error { // Check if anyone-client is already available via npx (more reliable for scoped packages) if cmd := exec.Command("npx", "--yes", "@anyone-protocol/anyone-client", "--version"); cmd.Run() == nil { - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ anyone-client already installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ anyone-client already installed\n") return nil } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " Installing anyone-client...\n") + fmt.Fprintf(bi.logWriter, " Installing anyone-client...\n") + + // Initialize NPM cache structure to ensure all directories exist + // This prevents "mkdir" errors when NPM tries to create nested cache directories + fmt.Fprintf(bi.logWriter, " Initializing NPM cache...\n") + + // Create nested cache directories with proper permissions + debrosHome := "/home/debros" + npmCacheDirs := []string{ + filepath.Join(debrosHome, ".npm"), + filepath.Join(debrosHome, ".npm", "_cacache"), + filepath.Join(debrosHome, ".npm", "_cacache", "tmp"), + filepath.Join(debrosHome, ".npm", "_logs"), + } + + for _, dir := range npmCacheDirs { + if err := os.MkdirAll(dir, 0700); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Failed to create %s: %v\n", dir, err) + continue + } + // Fix ownership to debros user (sequential to avoid race conditions) + if err := exec.Command("chown", "debros:debros", dir).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chown %s: %v\n", dir, err) + } + if err := exec.Command("chmod", "700", dir).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chmod %s: %v\n", dir, err) + } + } + + // Recursively fix ownership of entire .npm directory to ensure all nested files are owned by debros + if err := exec.Command("chown", "-R", "debros:debros", filepath.Join(debrosHome, ".npm")).Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø Warning: failed to chown .npm directory: %v\n", err) + } + + // Run npm cache verify as debros user with proper environment + cacheInitCmd := exec.Command("sudo", "-u", "debros", "npm", "cache", "verify", "--silent") + cacheInitCmd.Env = append(os.Environ(), "HOME="+debrosHome) + if err := cacheInitCmd.Run(); err != nil { + fmt.Fprintf(bi.logWriter, " āš ļø NPM cache verify warning: %v (continuing anyway)\n", err) + } // Install anyone-client globally via npm (using scoped package name) cmd := exec.Command("npm", "install", "-g", "@anyone-protocol/anyone-client") @@ -776,6 +842,6 @@ func (bi *BinaryInstaller) InstallAnyoneClient() error { } } - fmt.Fprintf(bi.logWriter.(interface{ Write([]byte) (int, error) }), " āœ“ anyone-client installed\n") + fmt.Fprintf(bi.logWriter, " āœ“ anyone-client installed\n") return nil } diff --git a/pkg/environments/production/orchestrator.go b/pkg/environments/production/orchestrator.go index 63ceaf2..72d9e71 100644 --- a/pkg/environments/production/orchestrator.go +++ b/pkg/environments/production/orchestrator.go @@ -7,6 +7,7 @@ import ( "os/exec" "path/filepath" "strings" + "time" ) // ProductionSetup orchestrates the entire production deployment @@ -66,7 +67,7 @@ func SaveBranchPreference(oramaDir, branch string) error { // NewProductionSetup creates a new production setup orchestrator func NewProductionSetup(oramaHome string, logWriter io.Writer, forceReconfigure bool, branch string, skipRepoUpdate bool, skipResourceChecks bool) *ProductionSetup { - oramaDir := oramaHome + "/.orama" + oramaDir := filepath.Join(oramaHome, ".orama") arch, _ := (&ArchitectureDetector{}).Detect() // If branch is empty, try to read from stored preference, otherwise default to main @@ -364,7 +365,7 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s } // Node config (unified architecture) - nodeConfig, err := ps.configGenerator.GenerateNodeConfig(peerAddresses, vpsIP, joinAddress, domain) + nodeConfig, err := ps.configGenerator.GenerateNodeConfig(peerAddresses, vpsIP, joinAddress, domain, enableHTTPS) if err != nil { return fmt.Errorf("failed to generate node config: %w", err) } @@ -403,7 +404,8 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s } // Phase5CreateSystemdServices creates and enables systemd units -func (ps *ProductionSetup) Phase5CreateSystemdServices() error { +// enableHTTPS determines the RQLite Raft port (7002 when SNI is enabled, 7001 otherwise) +func (ps *ProductionSetup) Phase5CreateSystemdServices(enableHTTPS bool) error { ps.logf("Phase 5: Creating systemd services...") // Validate all required binaries are available before creating services @@ -415,7 +417,11 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices() error { if err != nil { return fmt.Errorf("ipfs-cluster-service binary not available: %w", err) } - // Note: rqlited binary is not needed as a separate service - node manages RQLite internally + // RQLite binary for separate service + rqliteBinary, err := ps.binaryInstaller.ResolveBinaryPath("rqlited", "/usr/local/bin/rqlited", "/usr/bin/rqlited") + if err != nil { + return fmt.Errorf("rqlited binary not available: %w", err) + } olricBinary, err := ps.binaryInstaller.ResolveBinaryPath("olric-server", "/usr/local/bin/olric-server", "/usr/bin/olric-server") if err != nil { return fmt.Errorf("olric-server binary not available: %w", err) @@ -435,8 +441,17 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices() error { } ps.logf(" āœ“ IPFS Cluster service created: debros-ipfs-cluster.service") - // Note: RQLite is managed internally by the node process, not as a separate systemd service - ps.logf(" ā„¹ļø RQLite will be managed by the node process") + // RQLite service (join address and advertise IP will be handled by node bootstrap) + // When HTTPS/SNI is enabled, RQLite listens on internal port 7002 (SNI gateway handles external 7001) + raftPort := 7001 + if enableHTTPS { + raftPort = 7002 + } + rqliteUnit := ps.serviceGenerator.GenerateRQLiteService(rqliteBinary, 5001, raftPort, "", "") + if err := ps.serviceController.WriteServiceUnit("debros-rqlite.service", rqliteUnit); err != nil { + return fmt.Errorf("failed to write RQLite service: %w", err) + } + ps.logf(" āœ“ RQLite service created: debros-rqlite.service") // Olric service olricUnit := ps.serviceGenerator.GenerateOlricService(olricBinary) @@ -467,7 +482,7 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices() error { // Enable services (unified names - no bootstrap/node distinction) // Note: debros-gateway.service is no longer needed - each node has an embedded gateway - services := []string{"debros-ipfs.service", "debros-ipfs-cluster.service", "debros-olric.service", "debros-node.service", "debros-anyone-client.service"} + services := []string{"debros-ipfs.service", "debros-ipfs-cluster.service", "debros-rqlite.service", "debros-olric.service", "debros-node.service", "debros-anyone-client.service"} for _, svc := range services { if err := ps.serviceController.EnableService(svc); err != nil { ps.logf(" āš ļø Failed to enable %s: %v", svc, err) @@ -499,7 +514,7 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices() error { } // Wait a moment for infrastructure to stabilize - exec.Command("sleep", "2").Run() + time.Sleep(2 * time.Second) // Start IPFS Cluster if err := ps.serviceController.StartService("debros-ipfs-cluster.service"); err != nil { @@ -508,14 +523,11 @@ func (ps *ProductionSetup) Phase5CreateSystemdServices() error { ps.logf(" - debros-ipfs-cluster.service started") } - // Start application services - appServices := []string{"debros-node.service", "debros-gateway.service"} - for _, svc := range appServices { - if err := ps.serviceController.StartService(svc); err != nil { - ps.logf(" āš ļø Failed to start %s: %v", svc, err) - } else { - ps.logf(" - %s started", svc) - } + // Start node service (gateway is embedded in node, no separate service needed) + if err := ps.serviceController.StartService("debros-node.service"); err != nil { + ps.logf(" āš ļø Failed to start debros-node.service: %v", err) + } else { + ps.logf(" - debros-node.service started (with embedded gateway)") } ps.logf(" āœ“ All services started") @@ -541,7 +553,7 @@ func (ps *ProductionSetup) LogSetupComplete(peerID string) { ps.logf(" %s/logs/gateway.log", ps.oramaDir) ps.logf(" %s/logs/anyone-client.log", ps.oramaDir) ps.logf("\nStart All Services:") - ps.logf(" systemctl start debros-ipfs debros-ipfs-cluster debros-olric debros-anyone-client debros-node debros-gateway") + ps.logf(" systemctl start debros-ipfs debros-ipfs-cluster debros-rqlite debros-olric debros-anyone-client debros-node") ps.logf("\nVerify Installation:") ps.logf(" curl http://localhost:6001/health") ps.logf(" curl http://localhost:5001/status") diff --git a/pkg/environments/production/provisioner.go b/pkg/environments/production/provisioner.go index 71dd287..d11edfc 100644 --- a/pkg/environments/production/provisioner.go +++ b/pkg/environments/production/provisioner.go @@ -39,6 +39,7 @@ func (fp *FilesystemProvisioner) EnsureDirectoryStructure() error { filepath.Join(fp.oramaDir, "backups"), filepath.Join(fp.oramaHome, "bin"), filepath.Join(fp.oramaHome, "src"), + filepath.Join(fp.oramaHome, ".npm"), } for _, dir := range dirs { @@ -47,6 +48,13 @@ func (fp *FilesystemProvisioner) EnsureDirectoryStructure() error { } } + // Remove any stray cluster-secret file from root .orama directory + // The correct location is .orama/secrets/cluster-secret + strayClusterSecret := filepath.Join(fp.oramaDir, "cluster-secret") + if _, err := os.Stat(strayClusterSecret); err == nil { + os.Remove(strayClusterSecret) + } + // Create log files with correct permissions so systemd can write to them logsDir := filepath.Join(fp.oramaDir, "logs") logFiles := []string{ @@ -93,6 +101,13 @@ func (fp *FilesystemProvisioner) FixOwnership() error { return fmt.Errorf("failed to set ownership for %s: %w\nOutput: %s", binDir, err, string(output)) } + // Fix npm cache directory + npmDir := filepath.Join(fp.oramaHome, ".npm") + cmd = exec.Command("chown", "-R", "debros:debros", npmDir) + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to set ownership for %s: %w\nOutput: %s", npmDir, err, string(output)) + } + return nil } diff --git a/pkg/environments/production/services.go b/pkg/environments/production/services.go index 8784104..3f181dd 100644 --- a/pkg/environments/production/services.go +++ b/pkg/environments/production/services.go @@ -49,6 +49,12 @@ SyslogIdentifier=debros-ipfs NoNewPrivileges=yes PrivateTmp=yes ProtectSystem=strict +ProtectHome=read-only +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes ReadWritePaths=%[3]s [Install] @@ -68,11 +74,6 @@ func (ssg *SystemdServiceGenerator) GenerateIPFSClusterService(clusterBinary str clusterSecret = strings.TrimSpace(string(data)) } - // Escape the secret for use in bash command (escape single quotes and backslashes) - escapedSecret := strings.ReplaceAll(clusterSecret, "'", "'\"'\"'") - escapedSecret = strings.ReplaceAll(escapedSecret, "\\", "\\\\") - _ = escapedSecret // Used in ExecStartPre - return fmt.Sprintf(`[Unit] Description=IPFS Cluster Service After=debros-ipfs.service @@ -86,9 +87,9 @@ Group=debros WorkingDirectory=%[1]s Environment=HOME=%[1]s Environment=IPFS_CLUSTER_PATH=%[2]s -Environment=CLUSTER_SECRET=%[6]s -ExecStartPre=/bin/bash -c 'if [ -f %[7]s ] && [ -f %[2]s/service.json ]; then SECRET=$(cat %[7]s | tr -d "[:space:]"); python3 -c "import json, sys; f=open(\"%[2]s/service.json\", \"r\"); d=json.load(f); f.close(); d.setdefault(\"cluster\", {})[\"secret\"]=\"$SECRET\"; f=open(\"%[2]s/service.json\", \"w\"); json.dump(d, f, indent=2); f.close()" 2>/dev/null || sed -i "s|\"secret\"[[:space:]]*:[[:space:]]*\"[^\"]*\"|\"secret\": \"$SECRET\"|" %[2]s/service.json; fi' -ExecStart=%[5]s daemon +Environment=CLUSTER_SECRET=%[5]s +ExecStartPre=/bin/bash -c 'mkdir -p %[2]s && chmod 700 %[2]s' +ExecStart=%[4]s daemon Restart=always RestartSec=5 StandardOutput=file:%[3]s @@ -98,11 +99,17 @@ SyslogIdentifier=debros-ipfs-cluster NoNewPrivileges=yes PrivateTmp=yes ProtectSystem=strict -ReadWritePaths=%[4]s +ProtectHome=read-only +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes +ReadWritePaths=%[1]s [Install] WantedBy=multi-user.target -`, ssg.oramaHome, clusterPath, logFile, ssg.oramaDir, clusterBinary, clusterSecret, clusterSecretPath) +`, ssg.oramaHome, clusterPath, logFile, clusterBinary, clusterSecret) } // GenerateRQLiteService generates the RQLite systemd unit @@ -147,6 +154,12 @@ SyslogIdentifier=debros-rqlite NoNewPrivileges=yes PrivateTmp=yes ProtectSystem=strict +ProtectHome=read-only +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes ReadWritePaths=%[4]s [Install] @@ -180,6 +193,12 @@ SyslogIdentifier=olric NoNewPrivileges=yes PrivateTmp=yes ProtectSystem=strict +ProtectHome=read-only +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes ReadWritePaths=%[4]s [Install] @@ -194,9 +213,9 @@ func (ssg *SystemdServiceGenerator) GenerateNodeService() string { return fmt.Sprintf(`[Unit] Description=DeBros Network Node -After=debros-ipfs-cluster.service -Wants=debros-ipfs-cluster.service -Requires=debros-ipfs-cluster.service +After=debros-ipfs-cluster.service debros-rqlite.service +Wants=debros-ipfs-cluster.service debros-rqlite.service +Requires=debros-ipfs-cluster.service debros-rqlite.service [Service] Type=simple @@ -211,9 +230,17 @@ StandardOutput=file:%[4]s StandardError=file:%[4]s SyslogIdentifier=debros-node -NoNewPrivileges=yes +AmbientCapabilities=CAP_NET_BIND_SERVICE +CapabilityBoundingSet=CAP_NET_BIND_SERVICE + PrivateTmp=yes ProtectSystem=strict +ProtectHome=read-only +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes ReadWritePaths=%[2]s [Install] @@ -245,9 +272,16 @@ SyslogIdentifier=debros-gateway AmbientCapabilities=CAP_NET_BIND_SERVICE CapabilityBoundingSet=CAP_NET_BIND_SERVICE -NoNewPrivileges=yes +# Note: NoNewPrivileges is omitted because it conflicts with AmbientCapabilities +# The service needs CAP_NET_BIND_SERVICE to bind to privileged ports (80, 443) PrivateTmp=yes ProtectSystem=strict +ProtectHome=read-only +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes ReadWritePaths=%[2]s [Install] @@ -280,6 +314,12 @@ SyslogIdentifier=anyone-client NoNewPrivileges=yes PrivateTmp=yes ProtectSystem=strict +ProtectHome=read-only +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +RestrictRealtime=yes +RestrictSUIDSGID=yes ReadWritePaths=%[3]s [Install] diff --git a/pkg/environments/templates/node.yaml b/pkg/environments/templates/node.yaml index c761958..764f3da 100644 --- a/pkg/environments/templates/node.yaml +++ b/pkg/environments/templates/node.yaml @@ -13,7 +13,7 @@ database: max_database_size: 1073741824 backup_interval: "24h" rqlite_port: {{.RQLiteHTTPPort}} - rqlite_raft_port: {{.RQLiteRaftPort}} + rqlite_raft_port: {{.RQLiteRaftInternalPort}} rqlite_join_address: "{{.RQLiteJoinAddress}}" cluster_sync_interval: "30s" peer_inactivity_limit: "24h" @@ -44,9 +44,31 @@ logging: http_gateway: enabled: true - listen_addr: ":{{.UnifiedGatewayPort}}" + listen_addr: "{{if .EnableHTTPS}}:{{.HTTPSPort}}{{else}}:{{.UnifiedGatewayPort}}{{end}}" node_name: "{{.NodeID}}" + {{if .EnableHTTPS}}https: + enabled: true + domain: "{{.Domain}}" + auto_cert: true + cache_dir: "{{.TLSCacheDir}}" + http_port: {{.HTTPPort}} + https_port: {{.HTTPSPort}} + email: "admin@{{.Domain}}" + {{end}} + + {{if .EnableHTTPS}}sni: + enabled: true + listen_addr: ":{{.RQLiteRaftPort}}" + cert_file: "{{.TLSCacheDir}}/{{.Domain}}.crt" + key_file: "{{.TLSCacheDir}}/{{.Domain}}.key" + routes: + raft.{{.Domain}}: "localhost:{{.RQLiteRaftInternalPort}}" + ipfs.{{.Domain}}: "localhost:4101" + ipfs-cluster.{{.Domain}}: "localhost:9096" + olric.{{.Domain}}: "localhost:3322" + {{end}} + # Full gateway configuration (for API, auth, pubsub, and internal service routing) client_namespace: "default" rqlite_dsn: "http://localhost:{{.RQLiteHTTPPort}}" diff --git a/pkg/environments/templates/render.go b/pkg/environments/templates/render.go index 84d0c85..baeb020 100644 --- a/pkg/environments/templates/render.go +++ b/pkg/environments/templates/render.go @@ -13,19 +13,24 @@ var templatesFS embed.FS // NodeConfigData holds parameters for node.yaml rendering (unified - no bootstrap/node distinction) type NodeConfigData struct { - NodeID string - P2PPort int - DataDir string - RQLiteHTTPPort int - RQLiteRaftPort int - RQLiteJoinAddress string // Optional: join address for joining existing cluster - BootstrapPeers []string // List of peer multiaddrs to connect to - ClusterAPIPort int - IPFSAPIPort int // Default: 4501 - HTTPAdvAddress string // Advertised HTTP address (IP:port) - RaftAdvAddress string // Advertised Raft address (IP:port) - UnifiedGatewayPort int // Unified gateway port for all node services - Domain string // Domain for this node (e.g., node-123.orama.network) + NodeID string + P2PPort int + DataDir string + RQLiteHTTPPort int + RQLiteRaftPort int // External Raft port for advertisement (7001 for SNI) + RQLiteRaftInternalPort int // Internal Raft port for local binding (7002 when SNI enabled) + RQLiteJoinAddress string // Optional: join address for joining existing cluster + BootstrapPeers []string // List of peer multiaddrs to connect to + ClusterAPIPort int + IPFSAPIPort int // Default: 4501 + HTTPAdvAddress string // Advertised HTTP address (IP:port) + RaftAdvAddress string // Advertised Raft address (IP:port or domain:port for SNI) + UnifiedGatewayPort int // Unified gateway port for all node services + Domain string // Domain for this node (e.g., node-123.orama.network) + EnableHTTPS bool // Enable HTTPS/TLS with ACME + TLSCacheDir string // Directory for ACME certificate cache + HTTPPort int // HTTP port for ACME challenges (usually 80) + HTTPSPort int // HTTPS port (usually 443) } // GatewayConfigData holds parameters for gateway.yaml rendering diff --git a/pkg/gateway/https.go b/pkg/gateway/https.go index a7218a4..9e6c56a 100644 --- a/pkg/gateway/https.go +++ b/pkg/gateway/https.go @@ -5,10 +5,12 @@ import ( "crypto/tls" "fmt" "net/http" + "os" "strings" "time" "go.uber.org/zap" + "golang.org/x/crypto/acme" "golang.org/x/crypto/acme/autocert" "github.com/DeBrosOfficial/network/pkg/config" @@ -45,23 +47,46 @@ func NewHTTPSGateway(logger *logging.ColoredLogger, cfg *config.HTTPGatewayConfi httpsConfig: &cfg.HTTPS, } - // Set up Let's Encrypt autocert if enabled - if cfg.HTTPS.AutoCert { + // Check if using self-signed certificates or Let's Encrypt + if cfg.HTTPS.UseSelfSigned || (cfg.HTTPS.CertFile != "" && cfg.HTTPS.KeyFile != "") { + // Using self-signed or pre-existing certificates + logger.ComponentInfo(logging.ComponentGeneral, "Using self-signed or pre-configured certificates for HTTPS", + zap.String("domain", cfg.HTTPS.Domain), + zap.String("cert_file", cfg.HTTPS.CertFile), + zap.String("key_file", cfg.HTTPS.KeyFile), + ) + // Don't set certManager - will use CertFile/KeyFile from config + } else if cfg.HTTPS.AutoCert { + // Use Let's Encrypt (existing logic) cacheDir := cfg.HTTPS.CacheDir if cacheDir == "" { cacheDir = "/home/debros/.orama/tls-cache" } + // Check environment for staging mode + directoryURL := "https://acme-v02.api.letsencrypt.org/directory" // Production + if os.Getenv("DEBROS_ACME_STAGING") != "" { + directoryURL = "https://acme-staging-v02.api.letsencrypt.org/directory" + logger.ComponentWarn(logging.ComponentGeneral, + "Using Let's Encrypt STAGING - certificates will not be trusted by production clients", + zap.String("domain", cfg.HTTPS.Domain), + ) + } + gateway.certManager = &autocert.Manager{ Prompt: autocert.AcceptTOS, HostPolicy: autocert.HostWhitelist(cfg.HTTPS.Domain), Cache: autocert.DirCache(cacheDir), Email: cfg.HTTPS.Email, + Client: &acme.Client{ + DirectoryURL: directoryURL, + }, } logger.ComponentInfo(logging.ComponentGeneral, "Let's Encrypt autocert configured", zap.String("domain", cfg.HTTPS.Domain), zap.String("cache_dir", cacheDir), + zap.String("acme_environment", map[bool]string{true: "staging", false: "production"}[directoryURL == "https://acme-staging-v02.api.letsencrypt.org/directory"]), ) } diff --git a/pkg/installer/installer.go b/pkg/installer/installer.go index 23995e4..a60ade5 100644 --- a/pkg/installer/installer.go +++ b/pkg/installer/installer.go @@ -2,26 +2,35 @@ package installer import ( + "encoding/json" "fmt" "net" + "net/http" "os" + "path/filepath" "regexp" "strings" + "time" "github.com/charmbracelet/bubbles/textinput" tea "github.com/charmbracelet/bubbletea" "github.com/charmbracelet/lipgloss" + + "github.com/DeBrosOfficial/network/pkg/certutil" + "github.com/DeBrosOfficial/network/pkg/tlsutil" ) // InstallerConfig holds the configuration gathered from the TUI type InstallerConfig struct { VpsIP string Domain string - JoinAddress string - Peers []string + PeerDomain string // Domain of existing node to join + JoinAddress string // Auto-populated: raft.{PeerDomain}:7001 + Peers []string // Auto-populated: /dns4/{PeerDomain}/tcp/4001/p2p/{PeerID} ClusterSecret string Branch string IsFirstNode bool + NoPull bool } // Step represents a step in the installation wizard @@ -32,9 +41,10 @@ const ( StepNodeType StepVpsIP StepDomain - StepJoinAddress + StepPeerDomain // Domain of existing node to join (replaces StepJoinAddress) StepClusterSecret StepBranch + StepNoPull StepConfirm StepInstalling StepDone @@ -42,15 +52,18 @@ const ( // Model is the bubbletea model for the installer type Model struct { - step Step - config InstallerConfig - textInput textinput.Model - err error - width int - height int - installing bool - installOutput []string - cursor int // For selection menus + step Step + config InstallerConfig + textInput textinput.Model + err error + width int + height int + installing bool + installOutput []string + cursor int // For selection menus + discovering bool // Whether domain discovery is in progress + discoveryInfo string // Info message during discovery + discoveredPeer string // Discovered peer ID from domain } // Styles @@ -120,6 +133,10 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.height = msg.Height return m, nil + case installCompleteMsg: + m.step = StepDone + return m, nil + case tea.KeyMsg: switch msg.String() { case "ctrl+c", "q": @@ -131,18 +148,14 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { return m.handleEnter() case "up", "k": - if m.step == StepNodeType || m.step == StepBranch { + if m.step == StepNodeType || m.step == StepBranch || m.step == StepNoPull { if m.cursor > 0 { m.cursor-- } } case "down", "j": - if m.step == StepNodeType { - if m.cursor < 1 { - m.cursor++ - } - } else if m.step == StepBranch { + if m.step == StepNodeType || m.step == StepBranch || m.step == StepNoPull { if m.cursor < 1 { m.cursor++ } @@ -158,7 +171,7 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { } // Update text input for input steps - if m.step == StepVpsIP || m.step == StepDomain || m.step == StepJoinAddress || m.step == StepClusterSecret { + if m.step == StepVpsIP || m.step == StepDomain || m.step == StepPeerDomain || m.step == StepClusterSecret { var cmd tea.Cmd m.textInput, cmd = m.textInput.Update(msg) return m, cmd @@ -197,23 +210,56 @@ func (m *Model) handleEnter() (tea.Model, tea.Cmd) { } m.config.Domain = domain m.err = nil + + // Auto-generate self-signed certificates for this domain + m.discovering = true + m.discoveryInfo = "Generating SSL certificates for " + domain + "..." + + if err := ensureCertificatesForDomain(domain); err != nil { + m.discovering = false + m.err = fmt.Errorf("failed to generate certificates: %w", err) + return m, nil + } + + m.discovering = false + if m.config.IsFirstNode { m.step = StepBranch m.cursor = 0 } else { - m.step = StepJoinAddress + m.step = StepPeerDomain m.setupStepInput() } - case StepJoinAddress: - addr := strings.TrimSpace(m.textInput.Value()) - if addr != "" { - if err := validateJoinAddress(addr); err != nil { - m.err = err - return m, nil - } - m.config.JoinAddress = addr + case StepPeerDomain: + peerDomain := strings.TrimSpace(m.textInput.Value()) + if err := validateDomain(peerDomain); err != nil { + m.err = err + return m, nil } + + // Discover peer info from domain (try HTTPS first, then HTTP) + m.discovering = true + m.discoveryInfo = "Discovering peer from " + peerDomain + "..." + + peerID, err := discoverPeerFromDomain(peerDomain) + m.discovering = false + + if err != nil { + m.err = fmt.Errorf("failed to discover peer: %w", err) + return m, nil + } + + // Store discovered info + m.config.PeerDomain = peerDomain + m.discoveredPeer = peerID + + // Auto-populate join address and bootstrap peers + m.config.JoinAddress = fmt.Sprintf("raft.%s:7001", peerDomain) + m.config.Peers = []string{ + fmt.Sprintf("/dns4/%s/tcp/4001/p2p/%s", peerDomain, peerID), + } + m.err = nil m.step = StepClusterSecret m.setupStepInput() @@ -235,6 +281,15 @@ func (m *Model) handleEnter() (tea.Model, tea.Cmd) { } else { m.config.Branch = "nightly" } + m.cursor = 0 // Reset cursor for next step + m.step = StepNoPull + + case StepNoPull: + if m.cursor == 0 { + m.config.NoPull = false + } else { + m.config.NoPull = true + } m.step = StepConfirm case StepConfirm: @@ -251,6 +306,7 @@ func (m *Model) handleEnter() (tea.Model, tea.Cmd) { func (m *Model) setupStepInput() { m.textInput.Reset() m.textInput.Focus() + m.textInput.EchoMode = textinput.EchoNormal // Reset echo mode switch m.step { case StepVpsIP: @@ -261,8 +317,8 @@ func (m *Model) setupStepInput() { } case StepDomain: m.textInput.Placeholder = "e.g., node-1.orama.network" - case StepJoinAddress: - m.textInput.Placeholder = "e.g., 203.0.113.1:7001 (or leave empty)" + case StepPeerDomain: + m.textInput.Placeholder = "e.g., node-123.orama.network" case StepClusterSecret: m.textInput.Placeholder = "64 hex characters" m.textInput.EchoMode = textinput.EchoPassword @@ -298,12 +354,14 @@ func (m Model) View() string { s.WriteString(m.viewVpsIP()) case StepDomain: s.WriteString(m.viewDomain()) - case StepJoinAddress: - s.WriteString(m.viewJoinAddress()) + case StepPeerDomain: + s.WriteString(m.viewPeerDomain()) case StepClusterSecret: s.WriteString(m.viewClusterSecret()) case StepBranch: s.WriteString(m.viewBranch()) + case StepNoPull: + s.WriteString(m.viewNoPull()) case StepConfirm: s.WriteString(m.viewConfirm()) case StepInstalling: @@ -390,19 +448,27 @@ func (m Model) viewDomain() string { return s.String() } -func (m Model) viewJoinAddress() string { +func (m Model) viewPeerDomain() string { var s strings.Builder - s.WriteString(titleStyle.Render("Join Address") + "\n\n") - s.WriteString("Enter the RQLite address to join (IP:port):\n") - s.WriteString(subtitleStyle.Render("Leave empty to auto-detect from peers") + "\n\n") + s.WriteString(titleStyle.Render("Existing Node Domain") + "\n\n") + s.WriteString("Enter the domain of an existing node to join:\n") + s.WriteString(subtitleStyle.Render("The installer will auto-discover peer info via HTTPS/HTTP") + "\n\n") s.WriteString(m.textInput.View()) + if m.discovering { + s.WriteString("\n\n" + subtitleStyle.Render("šŸ” "+m.discoveryInfo)) + } + + if m.discoveredPeer != "" && m.err == nil { + s.WriteString("\n\n" + successStyle.Render("āœ“ Discovered peer: "+m.discoveredPeer[:12]+"...")) + } + if m.err != nil { s.WriteString("\n\n" + errorStyle.Render("āœ— " + m.err.Error())) } s.WriteString("\n\n") - s.WriteString(helpStyle.Render("Enter to confirm • Esc to go back")) + s.WriteString(helpStyle.Render("Enter to discover & continue • Esc to go back")) return s.String() } @@ -441,26 +507,56 @@ func (m Model) viewBranch() string { return s.String() } +func (m Model) viewNoPull() string { + var s strings.Builder + s.WriteString(titleStyle.Render("Git Repository") + "\n\n") + s.WriteString("Pull latest changes from repository?\n\n") + + options := []string{"Pull latest (recommended)", "Skip git pull (use existing source)"} + for i, opt := range options { + if i == m.cursor { + s.WriteString(cursorStyle.Render("→ ") + focusedStyle.Render(opt) + "\n") + } else { + s.WriteString(" " + blurredStyle.Render(opt) + "\n") + } + } + + s.WriteString("\n") + s.WriteString(helpStyle.Render("↑/↓ to select • Enter to confirm • Esc to go back")) + return s.String() +} + func (m Model) viewConfirm() string { var s strings.Builder s.WriteString(titleStyle.Render("Confirm Installation") + "\n\n") + noPullStr := "Pull latest" + if m.config.NoPull { + noPullStr = "Skip git pull" + } + config := fmt.Sprintf( " VPS IP: %s\n"+ " Domain: %s\n"+ " Branch: %s\n"+ + " Git Pull: %s\n"+ " Node Type: %s\n", m.config.VpsIP, m.config.Domain, m.config.Branch, + noPullStr, map[bool]string{true: "First node (new cluster)", false: "Join existing cluster"}[m.config.IsFirstNode], ) if !m.config.IsFirstNode { - if m.config.JoinAddress != "" { - config += fmt.Sprintf(" Join Addr: %s\n", m.config.JoinAddress) + config += fmt.Sprintf(" Peer Node: %s\n", m.config.PeerDomain) + config += fmt.Sprintf(" Join Addr: %s\n", m.config.JoinAddress) + if len(m.config.Peers) > 0 { + config += fmt.Sprintf(" Bootstrap: %s...\n", m.config.Peers[0][:40]) + } + if len(m.config.ClusterSecret) >= 8 { + config += fmt.Sprintf(" Secret: %s...\n", m.config.ClusterSecret[:8]) } - config += fmt.Sprintf(" Secret: %s...\n", m.config.ClusterSecret[:8]) } s.WriteString(boxStyle.Render(config)) @@ -521,15 +617,45 @@ func validateDomain(domain string) error { return nil } -func validateJoinAddress(addr string) error { - if addr == "" { - return nil // Optional - } - _, _, err := net.SplitHostPort(addr) +// discoverPeerFromDomain queries an existing node to get its peer ID +// Tries HTTPS first, then falls back to HTTP +// Respects DEBROS_TRUSTED_TLS_DOMAINS and DEBROS_CA_CERT_PATH environment variables for certificate verification +func discoverPeerFromDomain(domain string) (string, error) { + // Use centralized TLS configuration that respects CA certificates and trusted domains + client := tlsutil.NewHTTPClientForDomain(10*time.Second, domain) + + // Try HTTPS first + url := fmt.Sprintf("https://%s/v1/network/status", domain) + resp, err := client.Get(url) + + // If HTTPS fails, try HTTP if err != nil { - return fmt.Errorf("invalid address format (expected IP:port)") + // Finally try plain HTTP + url = fmt.Sprintf("http://%s/v1/network/status", domain) + resp, err = client.Get(url) + if err != nil { + return "", fmt.Errorf("could not connect to %s (tried HTTPS and HTTP): %w", domain, err) + } } - return nil + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("unexpected status from %s: %s", domain, resp.Status) + } + + // Parse response + var status struct { + NodeID string `json:"node_id"` + } + if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { + return "", fmt.Errorf("failed to parse response from %s: %w", domain, err) + } + + if status.NodeID == "" { + return "", fmt.Errorf("no node_id in response from %s", domain) + } + + return status.NodeID, nil } func validateClusterSecret(secret string) error { @@ -543,6 +669,47 @@ func validateClusterSecret(secret string) error { return nil } +// ensureCertificatesForDomain generates self-signed certificates for the domain +func ensureCertificatesForDomain(domain string) error { + // Get home directory + home, err := os.UserHomeDir() + if err != nil { + return fmt.Errorf("failed to get home directory: %w", err) + } + + // Create cert directory + certDir := filepath.Join(home, ".orama", "certs") + if err := os.MkdirAll(certDir, 0700); err != nil { + return fmt.Errorf("failed to create cert directory: %w", err) + } + + // Create certificate manager + cm := certutil.NewCertificateManager(certDir) + + // Ensure CA certificate exists + caCertPEM, caKeyPEM, err := cm.EnsureCACertificate() + if err != nil { + return fmt.Errorf("failed to ensure CA certificate: %w", err) + } + + // Ensure node certificate exists for the domain + _, _, err = cm.EnsureNodeCertificate(domain, caCertPEM, caKeyPEM) + if err != nil { + return fmt.Errorf("failed to ensure node certificate: %w", err) + } + + // Also create wildcard certificate if domain is not already wildcard + if !strings.HasPrefix(domain, "*.") { + wildcardDomain := "*." + domain + _, _, err = cm.EnsureNodeCertificate(wildcardDomain, caCertPEM, caKeyPEM) + if err != nil { + return fmt.Errorf("failed to ensure wildcard certificate: %w", err) + } + } + + return nil +} + func detectPublicIP() string { // Try to detect public IP from common interfaces addrs, err := net.InterfaceAddrs() @@ -566,13 +733,14 @@ func Run() (*InstallerConfig, error) { return nil, fmt.Errorf("installer must be run as root (use sudo)") } - p := tea.NewProgram(NewModel(), tea.WithAltScreen()) + model := NewModel() + p := tea.NewProgram(&model, tea.WithAltScreen()) finalModel, err := p.Run() if err != nil { return nil, err } - m := finalModel.(Model) + m := finalModel.(*Model) if m.step == StepInstalling || m.step == StepDone { config := m.GetConfig() return &config, nil diff --git a/pkg/ipfs/cluster.go b/pkg/ipfs/cluster.go index c711192..662e6d2 100644 --- a/pkg/ipfs/cluster.go +++ b/pkg/ipfs/cluster.go @@ -19,6 +19,7 @@ import ( "go.uber.org/zap" "github.com/DeBrosOfficial/network/pkg/config" + "github.com/DeBrosOfficial/network/pkg/tlsutil" "github.com/libp2p/go-libp2p/core/host" "github.com/multiformats/go-multiaddr" ) @@ -328,7 +329,7 @@ func (cm *ClusterConfigManager) UpdateAllClusterPeers() (bool, error) { } // Query local cluster API to get all peers - client := &standardHTTPClient{} + client := newStandardHTTPClient() peersURL := fmt.Sprintf("%s/peers", cm.cfg.Database.IPFS.ClusterAPIURL) resp, err := client.Get(peersURL) if err != nil { @@ -914,7 +915,7 @@ func parseIPFSPort(apiURL string) (int, error) { // getPeerID queries the cluster API to get the peer ID func getPeerID(apiURL string) (string, error) { // Simple HTTP client to query /peers endpoint - client := &standardHTTPClient{} + client := newStandardHTTPClient() resp, err := client.Get(fmt.Sprintf("%s/peers", apiURL)) if err != nil { return "", err @@ -966,11 +967,19 @@ func generateRandomSecret(length int) string { return hex.EncodeToString(bytes) } -// standardHTTPClient implements HTTP client using net/http -type standardHTTPClient struct{} +// standardHTTPClient implements HTTP client using net/http with centralized TLS configuration +type standardHTTPClient struct { + client *http.Client +} + +func newStandardHTTPClient() *standardHTTPClient { + return &standardHTTPClient{ + client: tlsutil.NewHTTPClient(30 * time.Second), + } +} func (c *standardHTTPClient) Get(url string) ([]byte, error) { - resp, err := http.Get(url) + resp, err := c.client.Get(url) if err != nil { return nil, err } diff --git a/pkg/logging/logger.go b/pkg/logging/logger.go index ca0b3c6..0dee825 100644 --- a/pkg/logging/logger.go +++ b/pkg/logging/logger.go @@ -54,6 +54,7 @@ const ( ComponentClient Component = "CLIENT" ComponentGeneral Component = "GENERAL" ComponentAnyone Component = "ANYONE" + ComponentGateway Component = "GATEWAY" ) // getComponentColor returns the color for a specific component @@ -75,6 +76,8 @@ func getComponentColor(component Component) string { return Yellow case ComponentAnyone: return Cyan + case ComponentGateway: + return BrightGreen default: return White } diff --git a/pkg/node/node.go b/pkg/node/node.go index 7d87c31..5e3f5e1 100644 --- a/pkg/node/node.go +++ b/pkg/node/node.go @@ -2,6 +2,9 @@ package node import ( "context" + "crypto/tls" + "crypto/x509" + "encoding/pem" "fmt" mathrand "math/rand" "net" @@ -20,6 +23,8 @@ import ( noise "github.com/libp2p/go-libp2p/p2p/security/noise" "github.com/multiformats/go-multiaddr" "go.uber.org/zap" + "golang.org/x/crypto/acme" + "golang.org/x/crypto/acme/autocert" "github.com/DeBrosOfficial/network/pkg/config" "github.com/DeBrosOfficial/network/pkg/discovery" @@ -56,6 +61,12 @@ type Node struct { // Full gateway (for API, auth, pubsub, and internal service routing) apiGateway *gateway.Gateway apiGatewayServer *http.Server + + // SNI gateway (for TCP routing of raft, ipfs, olric, etc.) + sniGateway *gateway.TCPSNIGateway + + // Shared certificate manager for HTTPS and SNI + certManager *autocert.Manager } // NewNode creates a new network node @@ -646,6 +657,13 @@ func (n *Node) Stop() error { n.apiGateway.Close() } + // Stop SNI Gateway + if n.sniGateway != nil { + if err := n.sniGateway.Stop(); err != nil { + n.logger.ComponentWarn(logging.ComponentNode, "SNI Gateway stop error", zap.Error(err)) + } + } + // Stop cluster discovery if n.clusterDiscovery != nil { n.clusterDiscovery.Stop() @@ -684,7 +702,7 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { } // Create separate logger for gateway - logFile := filepath.Join(os.ExpandEnv(n.config.Node.DataDir), "..", "logs", fmt.Sprintf("gateway-%s.log", n.config.HTTPGateway.NodeName)) + logFile := filepath.Join(os.ExpandEnv(n.config.Node.DataDir), "..", "logs", "gateway.log") // Ensure logs directory exists logsDir := filepath.Dir(logFile) @@ -709,6 +727,10 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { IPFSClusterAPIURL: n.config.HTTPGateway.IPFSClusterAPIURL, IPFSAPIURL: n.config.HTTPGateway.IPFSAPIURL, IPFSTimeout: n.config.HTTPGateway.IPFSTimeout, + // HTTPS/TLS configuration + EnableHTTPS: n.config.HTTPGateway.HTTPS.Enabled, + DomainName: n.config.HTTPGateway.HTTPS.Domain, + TLSCacheDir: n.config.HTTPGateway.HTTPS.CacheDir, } apiGateway, err := gateway.New(gatewayLogger, gwCfg) @@ -718,34 +740,329 @@ func (n *Node) startHTTPGateway(ctx context.Context) error { n.apiGateway = apiGateway + // Check if HTTPS is enabled and set up certManager BEFORE starting goroutine + // This ensures n.certManager is set before SNI gateway initialization checks it + var certManager *autocert.Manager + var tlsCacheDir string + if gwCfg.EnableHTTPS && gwCfg.DomainName != "" { + tlsCacheDir = gwCfg.TLSCacheDir + if tlsCacheDir == "" { + tlsCacheDir = "/home/debros/.orama/tls-cache" + } + + // Create TLS configuration with Let's Encrypt autocert + // Using STAGING environment to avoid rate limits during development/testing + // TODO: Switch to production when ready (remove Client field) + certManager = &autocert.Manager{ + Prompt: autocert.AcceptTOS, + HostPolicy: autocert.HostWhitelist(gwCfg.DomainName), + Cache: autocert.DirCache(tlsCacheDir), + Email: fmt.Sprintf("admin@%s", gwCfg.DomainName), + Client: &acme.Client{ + DirectoryURL: "https://acme-staging-v02.api.letsencrypt.org/directory", + }, + } + + // Store certificate manager for use by SNI gateway + n.certManager = certManager + } + + // Channel to signal when HTTP server is ready for ACME challenges + httpReady := make(chan struct{}) + // Start API Gateway in a goroutine go func() { - n.logger.ComponentInfo(logging.ComponentNode, "Starting full API gateway", + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Starting full API gateway", zap.String("listen_addr", gwCfg.ListenAddr), ) - server := &http.Server{ - Addr: gwCfg.ListenAddr, - Handler: apiGateway.Routes(), + // Check if HTTPS is enabled + if gwCfg.EnableHTTPS && gwCfg.DomainName != "" && certManager != nil { + // Start HTTPS server with automatic certificate provisioning + gatewayLogger.ComponentInfo(logging.ComponentGateway, "HTTPS enabled, starting secure gateway", + zap.String("domain", gwCfg.DomainName), + ) + + // Determine HTTPS and HTTP ports + httpsPort := 443 + httpPort := 80 + + // Start HTTP server for ACME challenges and redirects + // certManager.HTTPHandler() must be the main handler, with a fallback for other requests + httpServer := &http.Server{ + Addr: fmt.Sprintf(":%d", httpPort), + Handler: certManager.HTTPHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Fallback for non-ACME requests: redirect to HTTPS + target := fmt.Sprintf("https://%s%s", r.Host, r.URL.RequestURI()) + http.Redirect(w, r, target, http.StatusMovedPermanently) + })), + } + + // Create HTTP listener first to ensure port 80 is bound before signaling ready + httpListener, err := net.Listen("tcp", fmt.Sprintf(":%d", httpPort)) + if err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "failed to bind HTTP listener for ACME", zap.Error(err)) + close(httpReady) // Signal even on failure so SNI goroutine doesn't hang + return + } + gatewayLogger.ComponentInfo(logging.ComponentGateway, "HTTP server ready for ACME challenges", zap.Int("port", httpPort)) + + // Start HTTP server in background for ACME challenges + go func() { + if err := httpServer.Serve(httpListener); err != nil && err != http.ErrServerClosed { + gatewayLogger.ComponentError(logging.ComponentGateway, "HTTP server error", zap.Error(err)) + } + }() + + // Pre-provision the certificate BEFORE starting HTTPS server + // This ensures we don't accept HTTPS connections without a valid certificate + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Pre-provisioning TLS certificate...", + zap.String("domain", gwCfg.DomainName), + ) + + // Use a timeout context for certificate provisioning + // If Let's Encrypt is rate-limited or unreachable, don't block forever + certCtx, certCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer certCancel() + + certReq := &tls.ClientHelloInfo{ + ServerName: gwCfg.DomainName, } - n.apiGatewayServer = server + // Try to get certificate with timeout + certProvisionChan := make(chan error, 1) + go func() { + _, err := certManager.GetCertificate(certReq) + certProvisionChan <- err + }() - // Try to bind listener - ln, err := net.Listen("tcp", gwCfg.ListenAddr) - if err != nil { - n.logger.ComponentError(logging.ComponentNode, "failed to bind API gateway listener", zap.Error(err)) + var certErr error + select { + case err := <-certProvisionChan: + certErr = err + case <-certCtx.Done(): + certErr = fmt.Errorf("certificate provisioning timeout (Let's Encrypt may be rate-limited or unreachable)") + } + + if certErr != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "Failed to provision TLS certificate - HTTPS disabled", + zap.String("domain", gwCfg.DomainName), + zap.Error(certErr), + ) + // Signal ready for SNI goroutine (even though we're failing) + close(httpReady) + + // HTTP server on port 80 is already running, but it's configured to redirect to HTTPS + // Replace its handler to serve the gateway directly instead of redirecting + httpServer.Handler = apiGateway.Routes() + + gatewayLogger.ComponentInfo(logging.ComponentGateway, "HTTP gateway available on port 80 only", + zap.String("port", "80"), + ) return } - n.logger.ComponentInfo(logging.ComponentNode, "API gateway listener bound", zap.String("listen_addr", ln.Addr().String())) + gatewayLogger.ComponentInfo(logging.ComponentGateway, "TLS certificate provisioned successfully", + zap.String("domain", gwCfg.DomainName), + ) - // Serve HTTP - if err := server.Serve(ln); err != nil && err != http.ErrServerClosed { - n.logger.ComponentError(logging.ComponentNode, "API Gateway error", zap.Error(err)) + // Signal that HTTP server is ready for ACME challenges + close(httpReady) + + tlsConfig := &tls.Config{ + MinVersion: tls.VersionTLS12, + GetCertificate: certManager.GetCertificate, + } + + // Start HTTPS server + httpsServer := &http.Server{ + Addr: fmt.Sprintf(":%d", httpsPort), + TLSConfig: tlsConfig, + Handler: apiGateway.Routes(), + } + + n.apiGatewayServer = httpsServer + + listener, err := tls.Listen("tcp", fmt.Sprintf(":%d", httpsPort), tlsConfig) + if err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "failed to create TLS listener", zap.Error(err)) + return + } + + gatewayLogger.ComponentInfo(logging.ComponentGateway, "HTTPS gateway listener bound", + zap.String("domain", gwCfg.DomainName), + zap.Int("port", httpsPort), + ) + + // Serve HTTPS + if err := httpsServer.Serve(listener); err != nil && err != http.ErrServerClosed { + gatewayLogger.ComponentError(logging.ComponentGateway, "HTTPS Gateway error", zap.Error(err)) + } + } else { + // No HTTPS - signal ready immediately (no ACME needed) + close(httpReady) + + // Start plain HTTP server + server := &http.Server{ + Addr: gwCfg.ListenAddr, + Handler: apiGateway.Routes(), + } + + n.apiGatewayServer = server + + // Try to bind listener + ln, err := net.Listen("tcp", gwCfg.ListenAddr) + if err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "failed to bind API gateway listener", zap.Error(err)) + return + } + + gatewayLogger.ComponentInfo(logging.ComponentGateway, "API gateway listener bound", zap.String("listen_addr", ln.Addr().String())) + + // Serve HTTP + if err := server.Serve(ln); err != nil && err != http.ErrServerClosed { + gatewayLogger.ComponentError(logging.ComponentGateway, "API Gateway error", zap.Error(err)) + } } }() + // Initialize and start SNI gateway if HTTPS is enabled and SNI is configured + // This runs in a separate goroutine that waits for HTTP server to be ready + if n.config.HTTPGateway.SNI.Enabled && n.certManager != nil { + go func() { + // Wait for HTTP server to be ready for ACME challenges + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Waiting for HTTP server before SNI initialization...") + <-httpReady + + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Initializing SNI gateway", + zap.String("listen_addr", n.config.HTTPGateway.SNI.ListenAddr), + ) + + // Provision the certificate from Let's Encrypt cache + // This ensures the certificate file is downloaded and cached + domain := n.config.HTTPGateway.HTTPS.Domain + if domain != "" { + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Provisioning certificate for SNI", + zap.String("domain", domain)) + + certReq := &tls.ClientHelloInfo{ + ServerName: domain, + } + if tlsCert, err := n.certManager.GetCertificate(certReq); err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "Failed to provision certificate for SNI", + zap.String("domain", domain), zap.Error(err)) + return // Can't start SNI without certificate + } else { + gatewayLogger.ComponentInfo(logging.ComponentGateway, "Certificate provisioned for SNI", + zap.String("domain", domain)) + + // Extract certificate to PEM files for SNI gateway + // SNI gateway needs standard PEM cert files, not autocert cache format + tlsCacheDir := n.config.HTTPGateway.HTTPS.CacheDir + if tlsCacheDir == "" { + tlsCacheDir = "/home/debros/.orama/tls-cache" + } + + certPath := filepath.Join(tlsCacheDir, domain+".crt") + keyPath := filepath.Join(tlsCacheDir, domain+".key") + + if err := extractPEMFromTLSCert(tlsCert, certPath, keyPath); err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "Failed to extract PEM from TLS cert for SNI", + zap.Error(err)) + return // Can't start SNI without PEM files + } + gatewayLogger.ComponentInfo(logging.ComponentGateway, "PEM certificates extracted for SNI", + zap.String("cert_path", certPath), zap.String("key_path", keyPath)) + } + } else { + gatewayLogger.ComponentError(logging.ComponentGateway, "No domain configured for SNI certificate") + return + } + + // Create SNI config with certificate files + sniCfg := n.config.HTTPGateway.SNI + + // Use the same gateway logger for SNI gateway (writes to gateway.log) + sniGateway, err := gateway.NewTCPSNIGateway(gatewayLogger, &sniCfg) + if err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "Failed to initialize SNI gateway", zap.Error(err)) + return + } + + n.sniGateway = sniGateway + gatewayLogger.ComponentInfo(logging.ComponentGateway, "SNI gateway initialized, starting...") + + // Start SNI gateway (this blocks until shutdown) + if err := n.sniGateway.Start(ctx); err != nil { + gatewayLogger.ComponentError(logging.ComponentGateway, "SNI Gateway error", zap.Error(err)) + } + }() + } + + return nil +} + +// extractPEMFromTLSCert extracts certificate and private key from tls.Certificate to PEM files +func extractPEMFromTLSCert(tlsCert *tls.Certificate, certPath, keyPath string) error { + if tlsCert == nil || len(tlsCert.Certificate) == 0 { + return fmt.Errorf("invalid tls certificate") + } + + // Write certificate chain to PEM file + certFile, err := os.Create(certPath) + if err != nil { + return fmt.Errorf("failed to create cert file: %w", err) + } + defer certFile.Close() + + // Write all certificates in the chain + for _, certBytes := range tlsCert.Certificate { + if err := pem.Encode(certFile, &pem.Block{ + Type: "CERTIFICATE", + Bytes: certBytes, + }); err != nil { + return fmt.Errorf("failed to encode certificate: %w", err) + } + } + + // Write private key to PEM file + if tlsCert.PrivateKey == nil { + return fmt.Errorf("private key is nil") + } + + keyFile, err := os.Create(keyPath) + if err != nil { + return fmt.Errorf("failed to create key file: %w", err) + } + defer keyFile.Close() + + // Handle different key types + var keyBytes []byte + switch key := tlsCert.PrivateKey.(type) { + case *x509.Certificate: + keyBytes, err = x509.MarshalPKCS8PrivateKey(key) + if err != nil { + return fmt.Errorf("failed to marshal private key: %w", err) + } + default: + // Try to marshal as PKCS8 + keyBytes, err = x509.MarshalPKCS8PrivateKey(tlsCert.PrivateKey) + if err != nil { + return fmt.Errorf("failed to marshal private key: %w", err) + } + } + + if err := pem.Encode(keyFile, &pem.Block{ + Type: "PRIVATE KEY", + Bytes: keyBytes, + }); err != nil { + return fmt.Errorf("failed to encode private key: %w", err) + } + + // Set proper permissions + os.Chmod(certPath, 0644) + os.Chmod(keyPath, 0600) + return nil } diff --git a/pkg/rqlite/rqlite.go b/pkg/rqlite/rqlite.go index c00fa55..647cf10 100644 --- a/pkg/rqlite/rqlite.go +++ b/pkg/rqlite/rqlite.go @@ -18,6 +18,7 @@ import ( "go.uber.org/zap" "github.com/DeBrosOfficial/network/pkg/config" + "github.com/DeBrosOfficial/network/pkg/tlsutil" ) // RQLiteManager manages an RQLite node instance @@ -460,7 +461,7 @@ func (r *RQLiteManager) hasExistingState(rqliteDataDir string) bool { // For joining nodes in recovery, this may take longer (up to 3 minutes) func (r *RQLiteManager) waitForReady(ctx context.Context) error { url := fmt.Sprintf("http://localhost:%d/status", r.config.RQLitePort) - client := &http.Client{Timeout: 2 * time.Second} + client := tlsutil.NewHTTPClient(2 * time.Second) // All nodes may need time to open the store during recovery // Use consistent timeout for cluster consistency @@ -473,6 +474,11 @@ func (r *RQLiteManager) waitForReady(ctx context.Context) error { default: } + // Use centralized TLS configuration + if client == nil { + client = tlsutil.NewHTTPClient(2 * time.Second) + } + resp, err := client.Get(url) if err == nil && resp.StatusCode == http.StatusOK { // Parse the response to check for valid raft state @@ -680,7 +686,7 @@ func (r *RQLiteManager) testJoinAddress(joinAddress string) error { // Determine the HTTP status URL to probe. // If joinAddress contains a scheme, use it directly. Otherwise treat joinAddress // as host:port (Raft) and probe the standard HTTP API port 5001 on that host. - client := &http.Client{Timeout: 5 * time.Second} + client := tlsutil.NewHTTPClient(5 * time.Second) var statusURL string if strings.HasPrefix(joinAddress, "http://") || strings.HasPrefix(joinAddress, "https://") { diff --git a/pkg/tlsutil/client.go b/pkg/tlsutil/client.go new file mode 100644 index 0000000..f8702ef --- /dev/null +++ b/pkg/tlsutil/client.go @@ -0,0 +1,113 @@ +// Package tlsutil provides centralized TLS configuration for trusting specific domains +package tlsutil + +import ( + "crypto/tls" + "crypto/x509" + "net/http" + "os" + "strings" + "time" +) + +var ( + // Global cache of trusted domains loaded from environment + trustedDomains []string + // CA certificate pool for trusting self-signed certs + caCertPool *x509.CertPool + initialized bool +) + +// init loads trusted domains and CA certificate from environment and files +func init() { + domains := os.Getenv("DEBROS_TRUSTED_TLS_DOMAINS") + if domains != "" { + for _, d := range strings.Split(domains, ",") { + d = strings.TrimSpace(d) + if d != "" { + trustedDomains = append(trustedDomains, d) + } + } + } + + // Try to load CA certificate + caCertPath := os.Getenv("DEBROS_CA_CERT_PATH") + if caCertPath == "" { + caCertPath = "/etc/debros/ca.crt" + } + + if caCertData, err := os.ReadFile(caCertPath); err == nil { + caCertPool = x509.NewCertPool() + if caCertPool.AppendCertsFromPEM(caCertData) { + // Successfully loaded CA certificate + } + } + + initialized = true +} + +// GetTrustedDomains returns the list of domains to skip TLS verification for +func GetTrustedDomains() []string { + return trustedDomains +} + +// ShouldSkipTLSVerify checks if TLS verification should be skipped for this domain +func ShouldSkipTLSVerify(domain string) bool { + for _, trusted := range trustedDomains { + if strings.HasPrefix(trusted, "*.") { + // Handle wildcards like *.debros.network + suffix := strings.TrimPrefix(trusted, "*") + if strings.HasSuffix(domain, suffix) || domain == strings.TrimPrefix(suffix, ".") { + return true + } + } else if domain == trusted { + return true + } + } + return false +} + +// GetTLSConfig returns a TLS config with appropriate verification settings +func GetTLSConfig() *tls.Config { + config := &tls.Config{ + MinVersion: tls.VersionTLS12, + } + + // If we have a CA cert pool, use it + if caCertPool != nil { + config.RootCAs = caCertPool + } else if len(trustedDomains) > 0 { + // Fallback: skip verification if trusted domains are configured but no CA pool + config.InsecureSkipVerify = true + } + + return config +} + +// NewHTTPClient creates an HTTP client with TLS verification for trusted domains +func NewHTTPClient(timeout time.Duration) *http.Client { + return &http.Client{ + Timeout: timeout, + Transport: &http.Transport{ + TLSClientConfig: GetTLSConfig(), + }, + } +} + +// NewHTTPClientForDomain creates an HTTP client configured for a specific domain +func NewHTTPClientForDomain(timeout time.Duration, hostname string) *http.Client { + tlsConfig := GetTLSConfig() + + // If this domain is in trusted list and we don't have a CA pool, allow insecure + if caCertPool == nil && ShouldSkipTLSVerify(hostname) { + tlsConfig.InsecureSkipVerify = true + } + + return &http.Client{ + Timeout: timeout, + Transport: &http.Transport{ + TLSClientConfig: tlsConfig, + }, + } +} + diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..0213736 --- /dev/null +++ b/test.sh @@ -0,0 +1,4 @@ +for prefix in raft ipfs ipfs-cluster olric; do + echo -n "$prefix: " + timeout 3 bash -c "echo | openssl s_client -connect node-hk19de.debros.network:7001 -servername $prefix.node-hk19de.debros.network 2>&1 | grep -q 'CONNECTED' && echo 'OK' || echo 'FAIL'" +done