diff --git a/Makefile b/Makefile index bc029e3..0c84c9c 100644 --- a/Makefile +++ b/Makefile @@ -61,9 +61,9 @@ test-e2e-quick: # Network - Distributed P2P Database System # Makefile for development and build tasks -.PHONY: build clean test deps tidy fmt vet lint install-hooks upload-devnet upload-testnet redeploy-devnet redeploy-testnet release health +.PHONY: build clean test deps tidy fmt vet lint install-hooks push-devnet push-testnet rollout-devnet rollout-testnet release -VERSION := 0.112.7 +VERSION := 0.115.0 COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' @@ -89,9 +89,13 @@ build-linux: deps GOOS=linux GOARCH=amd64 go build -ldflags "$(LDFLAGS_LINUX)" -trimpath -o bin-linux/orama ./cmd/cli/ @echo "✓ CLI built at bin-linux/orama" @echo "" - @echo "Next steps:" - @echo " ./scripts/generate-source-archive.sh" - @echo " ./bin/orama install --vps-ip --nameserver --domain ..." + @echo "Prefer 'make build-archive' for full pre-built binary archive." + +# Build pre-compiled binary archive for deployment (all binaries + deps) +build-archive: deps + @echo "Building binary archive (version=$(VERSION))..." + go build -ldflags "$(LDFLAGS)" -o bin/orama ./cmd/cli/ + ./bin/orama build --output /tmp/orama-$(VERSION)-linux-amd64.tar.gz # Install git hooks install-hooks: @@ -105,29 +109,21 @@ clean: rm -rf data/ @echo "Clean complete!" -# Upload source to devnet using fanout (upload to 1 node, parallel distribute to rest) -upload-devnet: - @bash scripts/upload-source-fanout.sh --env devnet +# Push binary archive to devnet nodes (fanout distribution) +push-devnet: + ./bin/orama node push --env devnet -# Upload source to testnet using fanout -upload-testnet: - @bash scripts/upload-source-fanout.sh --env testnet +# Push binary archive to testnet nodes (fanout distribution) +push-testnet: + ./bin/orama node push --env testnet -# Deploy to devnet (build + rolling upgrade all nodes) -redeploy-devnet: - @bash scripts/redeploy.sh --devnet +# Full rollout to devnet (build + push + rolling upgrade) +rollout-devnet: + ./bin/orama node rollout --env devnet --yes -# Deploy to devnet without rebuilding -redeploy-devnet-quick: - @bash scripts/redeploy.sh --devnet --no-build - -# Deploy to testnet (build + rolling upgrade all nodes) -redeploy-testnet: - @bash scripts/redeploy.sh --testnet - -# Deploy to testnet without rebuilding -redeploy-testnet-quick: - @bash scripts/redeploy.sh --testnet --no-build +# Full rollout to testnet (build + push + rolling upgrade) +rollout-testnet: + ./bin/orama node rollout --env testnet --yes # Interactive release workflow (tag + push) release: @@ -140,14 +136,7 @@ health: echo "Usage: make health ENV=devnet|testnet"; \ exit 1; \ fi - @while IFS='|' read -r env host pass role key; do \ - [ -z "$$env" ] && continue; \ - case "$$env" in \#*) continue;; esac; \ - env="$$(echo "$$env" | xargs)"; \ - [ "$$env" != "$(ENV)" ] && continue; \ - role="$$(echo "$$role" | xargs)"; \ - bash scripts/check-node-health.sh "$$host" "$$pass" "$$host ($$role)"; \ - done < scripts/remote-nodes.conf + ./bin/orama monitor report --env $(ENV) # Help help: @@ -170,10 +159,11 @@ help: @echo " ORAMA_GATEWAY_URL=https://orama-devnet.network make test-e2e-prod" @echo "" @echo "Deployment:" - @echo " make redeploy-devnet - Build + rolling deploy to all devnet nodes" - @echo " make redeploy-devnet-quick - Deploy to devnet without rebuilding" - @echo " make redeploy-testnet - Build + rolling deploy to all testnet nodes" - @echo " make redeploy-testnet-quick- Deploy to testnet without rebuilding" + @echo " make build-archive - Build pre-compiled binary archive for deployment" + @echo " make push-devnet - Push binary archive to devnet nodes" + @echo " make push-testnet - Push binary archive to testnet nodes" + @echo " make rollout-devnet - Full rollout: build + push + rolling upgrade (devnet)" + @echo " make rollout-testnet - Full rollout: build + push + rolling upgrade (testnet)" @echo " make health ENV=devnet - Check health of all nodes in an environment" @echo " make release - Interactive release workflow (tag + push)" @echo "" diff --git a/cmd/cli/root.go b/cmd/cli/root.go index 93aba5d..266fc9b 100644 --- a/cmd/cli/root.go +++ b/cmd/cli/root.go @@ -9,6 +9,7 @@ import ( // Command groups "github.com/DeBrosOfficial/network/pkg/cli/cmd/app" "github.com/DeBrosOfficial/network/pkg/cli/cmd/authcmd" + "github.com/DeBrosOfficial/network/pkg/cli/cmd/buildcmd" "github.com/DeBrosOfficial/network/pkg/cli/cmd/dbcmd" deploycmd "github.com/DeBrosOfficial/network/pkg/cli/cmd/deploy" "github.com/DeBrosOfficial/network/pkg/cli/cmd/envcmd" @@ -83,6 +84,9 @@ and interacting with the Orama distributed network.`, // Serverless function commands rootCmd.AddCommand(functioncmd.Cmd) + // Build command (cross-compile binary archive) + rootCmd.AddCommand(buildcmd.Cmd) + return rootCmd } diff --git a/docs/DEV_DEPLOY.md b/docs/DEV_DEPLOY.md index c33537d..07265a4 100644 --- a/docs/DEV_DEPLOY.md +++ b/docs/DEV_DEPLOY.md @@ -27,87 +27,64 @@ make test ## Deploying to VPS -Source is always deployed via SCP (no git on VPS). The CLI is the only binary cross-compiled locally; everything else is built from source on the VPS. +All binaries are pre-compiled locally and shipped as a binary archive. Zero compilation on the VPS. ### Deploy Workflow ```bash -# 1. Cross-compile the CLI for Linux -make build-linux +# One-command: build + push + rolling upgrade +orama node rollout --env testnet -# 2. Generate a source archive (includes CLI binary + full source) -./scripts/generate-source-archive.sh -# Creates: /tmp/network-source.tar.gz +# Or step by step: -# 3. Install on a new VPS (handles SCP, extract, and remote install automatically) -./bin/orama node install --vps-ip --nameserver --domain --base-domain +# 1. Build binary archive (cross-compiles all binaries for linux/amd64) +orama build +# Creates: /tmp/orama--linux-amd64.tar.gz -# Or upgrade an existing VPS -./bin/orama node upgrade --restart +# 2. Push archive to all nodes (fanout via hub node) +orama node push --env testnet + +# 3. Rolling upgrade (one node at a time, followers first, leader last) +orama node upgrade --env testnet ``` -The `orama node install` command automatically: -1. Uploads the source archive via SCP -2. Extracts source to `/opt/orama/src` and installs the CLI to `/usr/local/bin/orama` -3. Runs `orama node install` on the VPS which builds all binaries from source (Go, CoreDNS, Caddy, Olric, etc.) +### Fresh Node Install + +```bash +# Build the archive first (if not already built) +orama build + +# Install on a new VPS (auto-uploads binary archive, zero compilation) +orama node install --vps-ip --nameserver --domain --base-domain +``` + +The installer auto-detects the binary archive at `/opt/orama/manifest.json` and copies pre-built binaries instead of compiling from source. ### Upgrading a Multi-Node Cluster (CRITICAL) -**NEVER restart all nodes simultaneously.** RQLite uses Raft consensus and requires a majority (quorum) to function. Restarting all nodes at once can cause cluster splits where nodes elect different leaders or form isolated clusters. +**NEVER restart all nodes simultaneously.** RQLite uses Raft consensus and requires a majority (quorum) to function. -#### Safe Upgrade Procedure (Rolling Restart) - -Always upgrade nodes **one at a time**, waiting for each to rejoin before proceeding: +#### Safe Upgrade Procedure ```bash -# 1. Build CLI + generate archive -make build-linux -./scripts/generate-source-archive.sh -# Creates: /tmp/network-source.tar.gz +# Full rollout (build + push + rolling upgrade, one command) +orama node rollout --env testnet -# 2. Upload to ONE node first (the "hub" node) -sshpass -p '' scp /tmp/network-source.tar.gz ubuntu@:/tmp/ +# Or with more control: +orama node push --env testnet # Push archive to all nodes +orama node upgrade --env testnet # Rolling upgrade (auto-detects leader) +orama node upgrade --env testnet --node 1.2.3.4 # Single node only +orama node upgrade --env testnet --delay 60 # 60s between nodes +``` -# 3. Fan out from hub to all other nodes (server-to-server is faster) -ssh ubuntu@ -for ip in ; do - scp /tmp/network-source.tar.gz ubuntu@$ip:/tmp/ -done -exit +The rolling upgrade automatically: +1. Upgrades **follower** nodes first +2. Upgrades the **leader** last +3. Waits a configurable delay between nodes (default: 30s) -# 4. Extract on ALL nodes (can be done in parallel, no restart yet) -for ip in ; do - ssh ubuntu@$ip 'sudo bash -s' < scripts/extract-deploy.sh -done - -# 5. Find the RQLite leader (upgrade this one LAST) -orama monitor report --env -# Check "rqlite_leader" in summary output - -# 6. Upgrade FOLLOWER nodes one at a time -ssh ubuntu@ 'sudo orama node stop && sudo orama node upgrade --restart' - -# IMPORTANT: Verify FULL health before proceeding to next node: -orama monitor report --env --node -# Check: -# - All services active, 0 restart loops -# - RQLite: Follower state, applied_index matches cluster -# - All RQLite peers reachable (no partition alerts) -# - WireGuard peers connected with recent handshakes -# Only proceed to next node after ALL checks pass. -# -# NOTE: After restarting a node, other nodes may briefly report it as -# "unreachable" with "broken pipe" errors. This is normal — Raft TCP -# connections need ~1-2 minutes to re-establish. Wait and re-check -# before escalating. - -# Repeat for each follower... - -# 7. Upgrade the LEADER node last -ssh ubuntu@ 'sudo orama node stop && sudo orama node upgrade --restart' - -# Verify the new leader was elected and cluster is fully healthy: -orama monitor report --env +After each node, verify health: +```bash +orama monitor report --env testnet ``` #### What NOT to Do @@ -121,31 +98,38 @@ orama monitor report --env If nodes get stuck in "Candidate" state or show "leader not found" errors: -1. Identify which node has the most recent data (usually the old leader) -2. Keep that node running as the new leader -3. On each other node, clear RQLite data and restart: - ```bash - sudo orama node stop - sudo rm -rf /opt/orama/.orama/data/rqlite - sudo systemctl start orama-node - ``` -4. The node should automatically rejoin using its configured `rqlite_join_address` - -If automatic rejoin fails, the node may have started without the `-join` flag. Check: ```bash -ps aux | grep rqlited -# Should include: -join 10.0.0.1:7001 (or similar) +# Recover the Raft cluster (specify the node with highest commit index as leader) +orama node recover-raft --env testnet --leader 1.2.3.4 ``` -If `-join` is missing, the node bootstrapped standalone. You'll need to either: -- Restart orama-node (it should detect empty data and use join) -- Or do a full cluster rebuild from CLEAN_NODE.md +This will: +1. Stop orama-node on ALL nodes +2. Backup + delete raft/ on non-leader nodes +3. Start the leader, wait for Leader state +4. Start remaining nodes in batches +5. Verify cluster health -### Deploying to Multiple Nodes +### Cleaning Nodes for Reinstallation -To deploy to all nodes, repeat steps 3-5 (dev) or 3-4 (production) for each VPS IP. +```bash +# Wipe all data and services (preserves Anyone relay keys) +orama node clean --env testnet --force -**Important:** When using `--restart`, do nodes one at a time (see "Upgrading a Multi-Node Cluster" above). +# Also remove shared binaries (rqlited, ipfs, caddy, etc.) +orama node clean --env testnet --nuclear --force + +# Single node only +orama node clean --env testnet --node 1.2.3.4 --force +``` + +### Push Options + +```bash +orama node push --env devnet # Fanout via hub (default, fastest) +orama node push --env testnet --node 1.2.3.4 # Single node +orama node push --env testnet --direct # Sequential, no fanout +``` ### CLI Flags Reference @@ -189,11 +173,56 @@ To deploy to all nodes, repeat steps 3-5 (dev) or 3-4 (production) for each VPS | Flag | Description | |------|-------------| -| `--restart` | Restart all services after upgrade | +| `--restart` | Restart all services after upgrade (local mode) | +| `--env ` | Target environment for remote rolling upgrade | +| `--node ` | Upgrade a single node only | +| `--delay ` | Delay between nodes during rolling upgrade (default: 30) | | `--anyone-relay` | Enable Anyone relay (same flags as install) | | `--anyone-bandwidth ` | Limit relay to N% of VPS bandwidth (default: 30, 0=unlimited) | | `--anyone-accounting ` | Monthly data cap for relay in GB (0=unlimited) | +#### `orama build` + +| Flag | Description | +|------|-------------| +| `--arch ` | Target architecture (default: amd64) | +| `--output ` | Output archive path | +| `--verbose` | Verbose build output | + +#### `orama node push` + +| Flag | Description | +|------|-------------| +| `--env ` | Target environment (required) | +| `--node ` | Push to a single node only | +| `--direct` | Sequential upload (no hub fanout) | + +#### `orama node rollout` + +| Flag | Description | +|------|-------------| +| `--env ` | Target environment (required) | +| `--no-build` | Skip the build step | +| `--yes` | Skip confirmation | +| `--delay ` | Delay between nodes (default: 30) | + +#### `orama node clean` + +| Flag | Description | +|------|-------------| +| `--env ` | Target environment (required) | +| `--node ` | Clean a single node only | +| `--nuclear` | Also remove shared binaries | +| `--force` | Skip confirmation (DESTRUCTIVE) | + +#### `orama node recover-raft` + +| Flag | Description | +|------|-------------| +| `--env ` | Target environment (required) | +| `--leader ` | Leader node IP — highest commit index (required) | +| `--force` | Skip confirmation (DESTRUCTIVE) | + #### `orama node` (Service Management) Use these commands to manage services on production nodes: diff --git a/pkg/cli/build/archive.go b/pkg/cli/build/archive.go new file mode 100644 index 0000000..25d8dd7 --- /dev/null +++ b/pkg/cli/build/archive.go @@ -0,0 +1,269 @@ +package build + +import ( + "archive/tar" + "compress/gzip" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "time" +) + +// Manifest describes the contents of a binary archive. +type Manifest struct { + Version string `json:"version"` + Commit string `json:"commit"` + Date string `json:"date"` + Arch string `json:"arch"` + Checksums map[string]string `json:"checksums"` // filename -> sha256 +} + +// generateManifest creates the manifest with SHA256 checksums of all binaries. +func (b *Builder) generateManifest() (*Manifest, error) { + m := &Manifest{ + Version: b.version, + Commit: b.commit, + Date: b.date, + Arch: b.flags.Arch, + Checksums: make(map[string]string), + } + + entries, err := os.ReadDir(b.binDir) + if err != nil { + return nil, err + } + + for _, entry := range entries { + if entry.IsDir() { + continue + } + path := filepath.Join(b.binDir, entry.Name()) + hash, err := sha256File(path) + if err != nil { + return nil, fmt.Errorf("failed to hash %s: %w", entry.Name(), err) + } + m.Checksums[entry.Name()] = hash + } + + return m, nil +} + +// createArchive creates the tar.gz archive from the build directory. +func (b *Builder) createArchive(outputPath string, manifest *Manifest) error { + fmt.Printf("\nCreating archive: %s\n", outputPath) + + // Write manifest.json to tmpDir + manifestData, err := json.MarshalIndent(manifest, "", " ") + if err != nil { + return err + } + if err := os.WriteFile(filepath.Join(b.tmpDir, "manifest.json"), manifestData, 0644); err != nil { + return err + } + + // Create output file + f, err := os.Create(outputPath) + if err != nil { + return err + } + defer f.Close() + + gw := gzip.NewWriter(f) + defer gw.Close() + + tw := tar.NewWriter(gw) + defer tw.Close() + + // Add bin/ directory + if err := addDirToTar(tw, b.binDir, "bin"); err != nil { + return err + } + + // Add systemd/ directory + systemdDir := filepath.Join(b.tmpDir, "systemd") + if _, err := os.Stat(systemdDir); err == nil { + if err := addDirToTar(tw, systemdDir, "systemd"); err != nil { + return err + } + } + + // Add packages/ directory if it exists + packagesDir := filepath.Join(b.tmpDir, "packages") + if _, err := os.Stat(packagesDir); err == nil { + if err := addDirToTar(tw, packagesDir, "packages"); err != nil { + return err + } + } + + // Add manifest.json + if err := addFileToTar(tw, filepath.Join(b.tmpDir, "manifest.json"), "manifest.json"); err != nil { + return err + } + + // Print summary + fmt.Printf(" bin/: %d binaries\n", len(manifest.Checksums)) + fmt.Printf(" systemd/: namespace templates\n") + fmt.Printf(" manifest: v%s (%s) linux/%s\n", manifest.Version, manifest.Commit, manifest.Arch) + + info, err := f.Stat() + if err == nil { + fmt.Printf(" size: %s\n", formatBytes(info.Size())) + } + + return nil +} + +// addDirToTar adds all files in a directory to the tar archive under the given prefix. +func addDirToTar(tw *tar.Writer, srcDir, prefix string) error { + return filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Calculate relative path + relPath, err := filepath.Rel(srcDir, path) + if err != nil { + return err + } + tarPath := filepath.Join(prefix, relPath) + + if info.IsDir() { + header := &tar.Header{ + Name: tarPath + "/", + Mode: 0755, + Typeflag: tar.TypeDir, + } + return tw.WriteHeader(header) + } + + return addFileToTar(tw, path, tarPath) + }) +} + +// addFileToTar adds a single file to the tar archive. +func addFileToTar(tw *tar.Writer, srcPath, tarPath string) error { + f, err := os.Open(srcPath) + if err != nil { + return err + } + defer f.Close() + + info, err := f.Stat() + if err != nil { + return err + } + + header := &tar.Header{ + Name: tarPath, + Size: info.Size(), + Mode: int64(info.Mode()), + } + + if err := tw.WriteHeader(header); err != nil { + return err + } + + _, err = io.Copy(tw, f) + return err +} + +// sha256File computes the SHA256 hash of a file. +func sha256File(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +// downloadFile downloads a URL to a local file path. +func downloadFile(url, destPath string) error { + client := &http.Client{Timeout: 5 * time.Minute} + resp, err := client.Get(url) + if err != nil { + return fmt.Errorf("failed to download %s: %w", url, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("download %s returned status %d", url, resp.StatusCode) + } + + f, err := os.Create(destPath) + if err != nil { + return err + } + defer f.Close() + + _, err = io.Copy(f, resp.Body) + return err +} + +// extractFileFromTarball extracts a single file from a tar.gz archive. +func extractFileFromTarball(tarPath, targetFile, destPath string) error { + f, err := os.Open(tarPath) + if err != nil { + return err + } + defer f.Close() + + gr, err := gzip.NewReader(f) + if err != nil { + return err + } + defer gr.Close() + + tr := tar.NewReader(gr) + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + // Match the target file (strip leading ./ if present) + name := strings.TrimPrefix(header.Name, "./") + if name == targetFile { + out, err := os.OpenFile(destPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0755) + if err != nil { + return err + } + defer out.Close() + + if _, err := io.Copy(out, tr); err != nil { + return err + } + return nil + } + } + + return fmt.Errorf("file %s not found in archive %s", targetFile, tarPath) +} + +// formatBytes formats bytes into a human-readable string. +func formatBytes(b int64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%d B", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp]) +} diff --git a/pkg/cli/build/builder.go b/pkg/cli/build/builder.go new file mode 100644 index 0000000..de82016 --- /dev/null +++ b/pkg/cli/build/builder.go @@ -0,0 +1,690 @@ +package build + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/DeBrosOfficial/network/pkg/constants" +) + +// oramaBinary defines a binary to cross-compile from the project source. +type oramaBinary struct { + Name string // output binary name + Package string // Go package path relative to project root + // Extra ldflags beyond the standard ones + ExtraLDFlags string +} + +// Builder orchestrates the entire build process. +type Builder struct { + flags *Flags + projectDir string + tmpDir string + binDir string + version string + commit string + date string +} + +// NewBuilder creates a new Builder. +func NewBuilder(flags *Flags) *Builder { + return &Builder{flags: flags} +} + +// Build runs the full build pipeline. +func (b *Builder) Build() error { + start := time.Now() + + // Find project root + projectDir, err := findProjectRoot() + if err != nil { + return err + } + b.projectDir = projectDir + + // Read version from Makefile or use "dev" + b.version = b.readVersion() + b.commit = b.readCommit() + b.date = time.Now().UTC().Format("2006-01-02T15:04:05Z") + + // Create temp build directory + b.tmpDir, err = os.MkdirTemp("", "orama-build-*") + if err != nil { + return fmt.Errorf("failed to create temp dir: %w", err) + } + defer os.RemoveAll(b.tmpDir) + + b.binDir = filepath.Join(b.tmpDir, "bin") + if err := os.MkdirAll(b.binDir, 0755); err != nil { + return fmt.Errorf("failed to create bin dir: %w", err) + } + + fmt.Printf("Building orama %s for linux/%s\n", b.version, b.flags.Arch) + fmt.Printf("Project: %s\n\n", b.projectDir) + + // Step 1: Cross-compile Orama binaries + if err := b.buildOramaBinaries(); err != nil { + return fmt.Errorf("failed to build orama binaries: %w", err) + } + + // Step 2: Cross-compile Olric + if err := b.buildOlric(); err != nil { + return fmt.Errorf("failed to build olric: %w", err) + } + + // Step 3: Cross-compile IPFS Cluster + if err := b.buildIPFSCluster(); err != nil { + return fmt.Errorf("failed to build ipfs-cluster: %w", err) + } + + // Step 4: Build CoreDNS with RQLite plugin + if err := b.buildCoreDNS(); err != nil { + return fmt.Errorf("failed to build coredns: %w", err) + } + + // Step 5: Build Caddy with Orama DNS module + if err := b.buildCaddy(); err != nil { + return fmt.Errorf("failed to build caddy: %w", err) + } + + // Step 6: Download pre-built IPFS Kubo + if err := b.downloadIPFS(); err != nil { + return fmt.Errorf("failed to download ipfs: %w", err) + } + + // Step 7: Download pre-built RQLite + if err := b.downloadRQLite(); err != nil { + return fmt.Errorf("failed to download rqlite: %w", err) + } + + // Step 8: Copy systemd templates + if err := b.copySystemdTemplates(); err != nil { + return fmt.Errorf("failed to copy systemd templates: %w", err) + } + + // Step 9: Generate manifest + manifest, err := b.generateManifest() + if err != nil { + return fmt.Errorf("failed to generate manifest: %w", err) + } + + // Step 10: Create archive + outputPath := b.flags.Output + if outputPath == "" { + outputPath = fmt.Sprintf("/tmp/orama-%s-linux-%s.tar.gz", b.version, b.flags.Arch) + } + + if err := b.createArchive(outputPath, manifest); err != nil { + return fmt.Errorf("failed to create archive: %w", err) + } + + elapsed := time.Since(start).Round(time.Second) + fmt.Printf("\nBuild complete in %s\n", elapsed) + fmt.Printf("Archive: %s\n", outputPath) + + return nil +} + +func (b *Builder) buildOramaBinaries() error { + fmt.Println("[1/7] Cross-compiling Orama binaries...") + + ldflags := fmt.Sprintf("-s -w -X 'main.version=%s' -X 'main.commit=%s' -X 'main.date=%s'", + b.version, b.commit, b.date) + + gatewayLDFlags := fmt.Sprintf("%s -X 'github.com/DeBrosOfficial/network/pkg/gateway.BuildVersion=%s' -X 'github.com/DeBrosOfficial/network/pkg/gateway.BuildCommit=%s' -X 'github.com/DeBrosOfficial/network/pkg/gateway.BuildTime=%s'", + ldflags, b.version, b.commit, b.date) + + binaries := []oramaBinary{ + {Name: "orama", Package: "./cmd/cli/"}, + {Name: "orama-node", Package: "./cmd/node/"}, + {Name: "gateway", Package: "./cmd/gateway/", ExtraLDFlags: gatewayLDFlags}, + {Name: "identity", Package: "./cmd/identity/"}, + {Name: "sfu", Package: "./cmd/sfu/"}, + {Name: "turn", Package: "./cmd/turn/"}, + } + + for _, bin := range binaries { + flags := ldflags + if bin.ExtraLDFlags != "" { + flags = bin.ExtraLDFlags + } + + output := filepath.Join(b.binDir, bin.Name) + cmd := exec.Command("go", "build", + "-ldflags", flags, + "-trimpath", + "-o", output, + bin.Package) + cmd.Dir = b.projectDir + cmd.Env = b.crossEnv() + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if b.flags.Verbose { + fmt.Printf(" go build -o %s %s\n", bin.Name, bin.Package) + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to build %s: %w", bin.Name, err) + } + fmt.Printf(" ✓ %s\n", bin.Name) + } + + return nil +} + +func (b *Builder) buildOlric() error { + fmt.Printf("[2/7] Cross-compiling Olric %s...\n", constants.OlricVersion) + + cmd := exec.Command("go", "install", + fmt.Sprintf("github.com/olric-data/olric/cmd/olric-server@%s", constants.OlricVersion)) + cmd.Env = append(b.crossEnv(), + "GOBIN="+b.binDir, + "GOPROXY=https://proxy.golang.org|direct", + "GONOSUMDB=*") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return err + } + fmt.Println(" ✓ olric-server") + return nil +} + +func (b *Builder) buildIPFSCluster() error { + fmt.Printf("[3/7] Cross-compiling IPFS Cluster %s...\n", constants.IPFSClusterVersion) + + cmd := exec.Command("go", "install", + fmt.Sprintf("github.com/ipfs-cluster/ipfs-cluster/cmd/ipfs-cluster-service@%s", constants.IPFSClusterVersion)) + cmd.Env = append(b.crossEnv(), + "GOBIN="+b.binDir, + "GOPROXY=https://proxy.golang.org|direct", + "GONOSUMDB=*") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return err + } + fmt.Println(" ✓ ipfs-cluster-service") + return nil +} + +func (b *Builder) buildCoreDNS() error { + fmt.Printf("[4/7] Building CoreDNS %s with RQLite plugin...\n", constants.CoreDNSVersion) + + buildDir := filepath.Join(b.tmpDir, "coredns-build") + + // Clone CoreDNS + fmt.Println(" Cloning CoreDNS...") + cmd := exec.Command("git", "clone", "--depth", "1", + "--branch", "v"+constants.CoreDNSVersion, + "https://github.com/coredns/coredns.git", buildDir) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to clone coredns: %w", err) + } + + // Copy RQLite plugin from local source + pluginSrc := filepath.Join(b.projectDir, "pkg", "coredns", "rqlite") + pluginDst := filepath.Join(buildDir, "plugin", "rqlite") + if err := os.MkdirAll(pluginDst, 0755); err != nil { + return err + } + + entries, err := os.ReadDir(pluginSrc) + if err != nil { + return fmt.Errorf("failed to read rqlite plugin source at %s: %w", pluginSrc, err) + } + for _, entry := range entries { + if entry.IsDir() || filepath.Ext(entry.Name()) != ".go" { + continue + } + data, err := os.ReadFile(filepath.Join(pluginSrc, entry.Name())) + if err != nil { + return err + } + if err := os.WriteFile(filepath.Join(pluginDst, entry.Name()), data, 0644); err != nil { + return err + } + } + + // Write plugin.cfg (same as build-linux-coredns.sh) + pluginCfg := `metadata:metadata +cancel:cancel +tls:tls +reload:reload +nsid:nsid +bufsize:bufsize +root:root +bind:bind +debug:debug +trace:trace +ready:ready +health:health +pprof:pprof +prometheus:metrics +errors:errors +log:log +dnstap:dnstap +local:local +dns64:dns64 +acl:acl +any:any +chaos:chaos +loadbalance:loadbalance +cache:cache +rewrite:rewrite +header:header +dnssec:dnssec +autopath:autopath +minimal:minimal +template:template +transfer:transfer +hosts:hosts +file:file +auto:auto +secondary:secondary +loop:loop +forward:forward +grpc:grpc +erratic:erratic +whoami:whoami +on:github.com/coredns/caddy/onevent +sign:sign +view:view +rqlite:rqlite +` + if err := os.WriteFile(filepath.Join(buildDir, "plugin.cfg"), []byte(pluginCfg), 0644); err != nil { + return err + } + + // Add dependencies + fmt.Println(" Adding dependencies...") + goPath := os.Getenv("PATH") + baseEnv := append(os.Environ(), + "PATH="+goPath, + "GOPROXY=https://proxy.golang.org|direct", + "GONOSUMDB=*") + + for _, dep := range []string{"github.com/miekg/dns@latest", "go.uber.org/zap@latest"} { + cmd := exec.Command("go", "get", dep) + cmd.Dir = buildDir + cmd.Env = baseEnv + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to get %s: %w", dep, err) + } + } + + cmd = exec.Command("go", "mod", "tidy") + cmd.Dir = buildDir + cmd.Env = baseEnv + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("go mod tidy failed: %w", err) + } + + // Generate plugin code + fmt.Println(" Generating plugin code...") + cmd = exec.Command("go", "generate") + cmd.Dir = buildDir + cmd.Env = baseEnv + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("go generate failed: %w", err) + } + + // Cross-compile + fmt.Println(" Building binary...") + cmd = exec.Command("go", "build", + "-ldflags", "-s -w", + "-trimpath", + "-o", filepath.Join(b.binDir, "coredns")) + cmd.Dir = buildDir + cmd.Env = append(baseEnv, + "GOOS=linux", + fmt.Sprintf("GOARCH=%s", b.flags.Arch), + "CGO_ENABLED=0") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("build failed: %w", err) + } + + fmt.Println(" ✓ coredns") + return nil +} + +func (b *Builder) buildCaddy() error { + fmt.Printf("[5/7] Building Caddy %s with Orama DNS module...\n", constants.CaddyVersion) + + // Ensure xcaddy is available + if _, err := exec.LookPath("xcaddy"); err != nil { + return fmt.Errorf("xcaddy not found in PATH — install with: go install github.com/caddyserver/xcaddy/cmd/xcaddy@latest") + } + + moduleDir := filepath.Join(b.tmpDir, "caddy-dns-orama") + if err := os.MkdirAll(moduleDir, 0755); err != nil { + return err + } + + // Write go.mod + goMod := fmt.Sprintf(`module github.com/DeBrosOfficial/caddy-dns-orama + +go 1.22 + +require ( + github.com/caddyserver/caddy/v2 v2.%s + github.com/libdns/libdns v1.1.0 +) +`, constants.CaddyVersion[2:]) + if err := os.WriteFile(filepath.Join(moduleDir, "go.mod"), []byte(goMod), 0644); err != nil { + return err + } + + // Write provider.go — read from the caddy installer's generated code + // We inline the same provider code used by the VPS-side caddy installer + providerCode := generateCaddyProviderCode() + if err := os.WriteFile(filepath.Join(moduleDir, "provider.go"), []byte(providerCode), 0644); err != nil { + return err + } + + // go mod tidy + cmd := exec.Command("go", "mod", "tidy") + cmd.Dir = moduleDir + cmd.Env = append(os.Environ(), + "GOPROXY=https://proxy.golang.org|direct", + "GONOSUMDB=*") + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("go mod tidy failed: %w", err) + } + + // Build with xcaddy + fmt.Println(" Building binary...") + cmd = exec.Command("xcaddy", "build", + "v"+constants.CaddyVersion, + "--with", "github.com/DeBrosOfficial/caddy-dns-orama="+moduleDir, + "--output", filepath.Join(b.binDir, "caddy")) + cmd.Env = append(os.Environ(), + "GOOS=linux", + fmt.Sprintf("GOARCH=%s", b.flags.Arch), + "GOPROXY=https://proxy.golang.org|direct", + "GONOSUMDB=*") + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("xcaddy build failed: %w", err) + } + + fmt.Println(" ✓ caddy") + return nil +} + +func (b *Builder) downloadIPFS() error { + fmt.Printf("[6/7] Downloading IPFS Kubo %s...\n", constants.IPFSKuboVersion) + + arch := b.flags.Arch + tarball := fmt.Sprintf("kubo_%s_linux-%s.tar.gz", constants.IPFSKuboVersion, arch) + url := fmt.Sprintf("https://dist.ipfs.tech/kubo/%s/%s", constants.IPFSKuboVersion, tarball) + tarPath := filepath.Join(b.tmpDir, tarball) + + if err := downloadFile(url, tarPath); err != nil { + return err + } + + // Extract ipfs binary from kubo/ipfs + if err := extractFileFromTarball(tarPath, "kubo/ipfs", filepath.Join(b.binDir, "ipfs")); err != nil { + return err + } + + fmt.Println(" ✓ ipfs") + return nil +} + +func (b *Builder) downloadRQLite() error { + fmt.Printf("[7/7] Downloading RQLite %s...\n", constants.RQLiteVersion) + + arch := b.flags.Arch + tarball := fmt.Sprintf("rqlite-v%s-linux-%s.tar.gz", constants.RQLiteVersion, arch) + url := fmt.Sprintf("https://github.com/rqlite/rqlite/releases/download/v%s/%s", constants.RQLiteVersion, tarball) + tarPath := filepath.Join(b.tmpDir, tarball) + + if err := downloadFile(url, tarPath); err != nil { + return err + } + + // Extract rqlited binary + extractDir := fmt.Sprintf("rqlite-v%s-linux-%s", constants.RQLiteVersion, arch) + if err := extractFileFromTarball(tarPath, extractDir+"/rqlited", filepath.Join(b.binDir, "rqlited")); err != nil { + return err + } + + fmt.Println(" ✓ rqlited") + return nil +} + +func (b *Builder) copySystemdTemplates() error { + systemdSrc := filepath.Join(b.projectDir, "systemd") + systemdDst := filepath.Join(b.tmpDir, "systemd") + if err := os.MkdirAll(systemdDst, 0755); err != nil { + return err + } + + entries, err := os.ReadDir(systemdSrc) + if err != nil { + return fmt.Errorf("failed to read systemd dir: %w", err) + } + + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".service") { + continue + } + data, err := os.ReadFile(filepath.Join(systemdSrc, entry.Name())) + if err != nil { + return err + } + if err := os.WriteFile(filepath.Join(systemdDst, entry.Name()), data, 0644); err != nil { + return err + } + } + + return nil +} + +// crossEnv returns the environment for cross-compilation. +func (b *Builder) crossEnv() []string { + return append(os.Environ(), + "GOOS=linux", + fmt.Sprintf("GOARCH=%s", b.flags.Arch), + "CGO_ENABLED=0") +} + +func (b *Builder) readVersion() string { + // Try to read from Makefile + data, err := os.ReadFile(filepath.Join(b.projectDir, "Makefile")) + if err != nil { + return "dev" + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "VERSION") { + parts := strings.SplitN(line, ":=", 2) + if len(parts) == 2 { + return strings.TrimSpace(parts[1]) + } + } + } + return "dev" +} + +func (b *Builder) readCommit() string { + cmd := exec.Command("git", "rev-parse", "--short", "HEAD") + cmd.Dir = b.projectDir + out, err := cmd.Output() + if err != nil { + return "unknown" + } + return strings.TrimSpace(string(out)) +} + +// generateCaddyProviderCode returns the Caddy DNS provider Go source. +// This is the same code used by the VPS-side caddy installer. +func generateCaddyProviderCode() string { + return `// Package orama implements a DNS provider for Caddy that uses the Orama Network +// gateway's internal ACME API for DNS-01 challenge validation. +package orama + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + "github.com/caddyserver/caddy/v2" + "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" + "github.com/libdns/libdns" +) + +func init() { + caddy.RegisterModule(Provider{}) +} + +// Provider wraps the Orama DNS provider for Caddy. +type Provider struct { + // Endpoint is the URL of the Orama gateway's ACME API + // Default: http://localhost:6001/v1/internal/acme + Endpoint string ` + "`json:\"endpoint,omitempty\"`" + ` +} + +// CaddyModule returns the Caddy module information. +func (Provider) CaddyModule() caddy.ModuleInfo { + return caddy.ModuleInfo{ + ID: "dns.providers.orama", + New: func() caddy.Module { return new(Provider) }, + } +} + +// Provision sets up the module. +func (p *Provider) Provision(ctx caddy.Context) error { + if p.Endpoint == "" { + p.Endpoint = "http://localhost:6001/v1/internal/acme" + } + return nil +} + +// UnmarshalCaddyfile parses the Caddyfile configuration. +func (p *Provider) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { + for d.Next() { + for d.NextBlock(0) { + switch d.Val() { + case "endpoint": + if !d.NextArg() { + return d.ArgErr() + } + p.Endpoint = d.Val() + default: + return d.Errf("unrecognized option: %s", d.Val()) + } + } + } + return nil +} + +// AppendRecords adds records to the zone. +func (p *Provider) AppendRecords(ctx context.Context, zone string, records []libdns.Record) ([]libdns.Record, error) { + var added []libdns.Record + for _, rec := range records { + rr := rec.RR() + if rr.Type != "TXT" { + continue + } + fqdn := rr.Name + "." + zone + payload := map[string]string{"fqdn": fqdn, "value": rr.Data} + body, err := json.Marshal(payload) + if err != nil { + return added, fmt.Errorf("failed to marshal request: %w", err) + } + req, err := http.NewRequestWithContext(ctx, "POST", p.Endpoint+"/present", bytes.NewReader(body)) + if err != nil { + return added, fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + return added, fmt.Errorf("failed to present challenge: %w", err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return added, fmt.Errorf("present failed with status %d", resp.StatusCode) + } + added = append(added, rec) + } + return added, nil +} + +// DeleteRecords removes records from the zone. +func (p *Provider) DeleteRecords(ctx context.Context, zone string, records []libdns.Record) ([]libdns.Record, error) { + var deleted []libdns.Record + for _, rec := range records { + rr := rec.RR() + if rr.Type != "TXT" { + continue + } + fqdn := rr.Name + "." + zone + payload := map[string]string{"fqdn": fqdn, "value": rr.Data} + body, err := json.Marshal(payload) + if err != nil { + return deleted, fmt.Errorf("failed to marshal request: %w", err) + } + req, err := http.NewRequestWithContext(ctx, "POST", p.Endpoint+"/cleanup", bytes.NewReader(body)) + if err != nil { + return deleted, fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + return deleted, fmt.Errorf("failed to cleanup challenge: %w", err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return deleted, fmt.Errorf("cleanup failed with status %d", resp.StatusCode) + } + deleted = append(deleted, rec) + } + return deleted, nil +} + +// GetRecords returns the records in the zone. Not used for ACME. +func (p *Provider) GetRecords(ctx context.Context, zone string) ([]libdns.Record, error) { + return nil, nil +} + +// SetRecords sets the records in the zone. Not used for ACME. +func (p *Provider) SetRecords(ctx context.Context, zone string, records []libdns.Record) ([]libdns.Record, error) { + return nil, nil +} + +// Interface guards +var ( + _ caddy.Module = (*Provider)(nil) + _ caddy.Provisioner = (*Provider)(nil) + _ caddyfile.Unmarshaler = (*Provider)(nil) + _ libdns.RecordAppender = (*Provider)(nil) + _ libdns.RecordDeleter = (*Provider)(nil) + _ libdns.RecordGetter = (*Provider)(nil) + _ libdns.RecordSetter = (*Provider)(nil) +) +` +} diff --git a/pkg/cli/build/command.go b/pkg/cli/build/command.go new file mode 100644 index 0000000..97fe0f4 --- /dev/null +++ b/pkg/cli/build/command.go @@ -0,0 +1,80 @@ +package build + +import ( + "flag" + "fmt" + "os" + "path/filepath" + "runtime" +) + +// Flags represents build command flags. +type Flags struct { + Arch string + Output string + Verbose bool +} + +// Handle is the entry point for the build command. +func Handle(args []string) { + flags, err := parseFlags(args) + if err != nil { + if err == flag.ErrHelp { + return + } + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + b := NewBuilder(flags) + if err := b.Build(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func parseFlags(args []string) (*Flags, error) { + fs := flag.NewFlagSet("build", flag.ContinueOnError) + fs.SetOutput(os.Stderr) + + flags := &Flags{} + + fs.StringVar(&flags.Arch, "arch", "amd64", "Target architecture (amd64, arm64)") + fs.StringVar(&flags.Output, "output", "", "Output archive path (default: /tmp/orama--linux-.tar.gz)") + fs.BoolVar(&flags.Verbose, "verbose", false, "Verbose output") + + if err := fs.Parse(args); err != nil { + return nil, err + } + + return flags, nil +} + +// findProjectRoot walks up from the current directory looking for go.mod. +func findProjectRoot() (string, error) { + dir, err := os.Getwd() + if err != nil { + return "", err + } + + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + // Verify it's the network project + if _, err := os.Stat(filepath.Join(dir, "cmd", "cli")); err == nil { + return dir, nil + } + } + parent := filepath.Dir(dir) + if parent == dir { + break + } + dir = parent + } + + return "", fmt.Errorf("could not find project root (no go.mod with cmd/cli found)") +} + +// detectHostArch returns the host architecture in Go naming convention. +func detectHostArch() string { + return runtime.GOARCH +} diff --git a/pkg/cli/cmd/buildcmd/build.go b/pkg/cli/cmd/buildcmd/build.go new file mode 100644 index 0000000..dd7b5db --- /dev/null +++ b/pkg/cli/cmd/buildcmd/build.go @@ -0,0 +1,24 @@ +package buildcmd + +import ( + "github.com/DeBrosOfficial/network/pkg/cli/build" + "github.com/spf13/cobra" +) + +// Cmd is the top-level build command. +var Cmd = &cobra.Command{ + Use: "build", + Short: "Build pre-compiled binary archive for deployment", + Long: `Cross-compile all Orama binaries and dependencies for Linux, +then package them into a deployment archive. The archive includes: + - Orama binaries (CLI, node, gateway, identity, SFU, TURN) + - Olric, IPFS Kubo, IPFS Cluster, RQLite, CoreDNS, Caddy + - Systemd namespace templates + - manifest.json with checksums + +The resulting archive can be pushed to nodes with 'orama node push'.`, + Run: func(cmd *cobra.Command, args []string) { + build.Handle(args) + }, + DisableFlagParsing: true, +} diff --git a/pkg/cli/cmd/node/clean.go b/pkg/cli/cmd/node/clean.go new file mode 100644 index 0000000..65c80a3 --- /dev/null +++ b/pkg/cli/cmd/node/clean.go @@ -0,0 +1,25 @@ +package node + +import ( + "github.com/DeBrosOfficial/network/pkg/cli/production/clean" + "github.com/spf13/cobra" +) + +var cleanCmd = &cobra.Command{ + Use: "clean", + Short: "Clean (wipe) remote nodes for reinstallation", + Long: `Remove all Orama data, services, and configuration from remote nodes. +Anyone relay keys at /var/lib/anon/ are preserved. + +This is a DESTRUCTIVE operation. Use --force to skip confirmation. + +Examples: + orama node clean --env testnet # Clean all testnet nodes + orama node clean --env testnet --node 1.2.3.4 # Clean specific node + orama node clean --env testnet --nuclear # Also remove shared binaries + orama node clean --env testnet --force # Skip confirmation`, + Run: func(cmd *cobra.Command, args []string) { + clean.Handle(args) + }, + DisableFlagParsing: true, +} diff --git a/pkg/cli/cmd/node/node.go b/pkg/cli/cmd/node/node.go index 400f7fb..5520571 100644 --- a/pkg/cli/cmd/node/node.go +++ b/pkg/cli/cmd/node/node.go @@ -26,4 +26,8 @@ func init() { Cmd.AddCommand(migrateCmd) Cmd.AddCommand(doctorCmd) Cmd.AddCommand(reportCmd) + Cmd.AddCommand(pushCmd) + Cmd.AddCommand(rolloutCmd) + Cmd.AddCommand(cleanCmd) + Cmd.AddCommand(recoverRaftCmd) } diff --git a/pkg/cli/cmd/node/push.go b/pkg/cli/cmd/node/push.go new file mode 100644 index 0000000..3c1b159 --- /dev/null +++ b/pkg/cli/cmd/node/push.go @@ -0,0 +1,24 @@ +package node + +import ( + "github.com/DeBrosOfficial/network/pkg/cli/production/push" + "github.com/spf13/cobra" +) + +var pushCmd = &cobra.Command{ + Use: "push", + Short: "Push binary archive to remote nodes", + Long: `Upload a pre-built binary archive to remote nodes. + +By default, uses fanout distribution: uploads to one hub node, +then distributes to all others via server-to-server SCP. + +Examples: + orama node push --env devnet # Fanout to all devnet nodes + orama node push --env testnet --node 1.2.3.4 # Single node + orama node push --env testnet --direct # Sequential upload to each node`, + Run: func(cmd *cobra.Command, args []string) { + push.Handle(args) + }, + DisableFlagParsing: true, +} diff --git a/pkg/cli/cmd/node/recover_raft.go b/pkg/cli/cmd/node/recover_raft.go new file mode 100644 index 0000000..a6499df --- /dev/null +++ b/pkg/cli/cmd/node/recover_raft.go @@ -0,0 +1,31 @@ +package node + +import ( + "github.com/DeBrosOfficial/network/pkg/cli/production/recover" + "github.com/spf13/cobra" +) + +var recoverRaftCmd = &cobra.Command{ + Use: "recover-raft", + Short: "Recover RQLite cluster from split-brain", + Long: `Recover the RQLite Raft cluster from split-brain failure. + +Strategy: + 1. Stop orama-node on ALL nodes simultaneously + 2. Backup and delete raft/ on non-leader nodes + 3. Start leader node, wait for Leader state + 4. Start remaining nodes in batches + 5. Verify cluster health + +The --leader flag must point to the node with the highest commit index. + +This is a DESTRUCTIVE operation. Use --force to skip confirmation. + +Examples: + orama node recover-raft --env testnet --leader 1.2.3.4 + orama node recover-raft --env devnet --leader 1.2.3.4 --force`, + Run: func(cmd *cobra.Command, args []string) { + recover.Handle(args) + }, + DisableFlagParsing: true, +} diff --git a/pkg/cli/cmd/node/rollout.go b/pkg/cli/cmd/node/rollout.go new file mode 100644 index 0000000..d2a2c59 --- /dev/null +++ b/pkg/cli/cmd/node/rollout.go @@ -0,0 +1,22 @@ +package node + +import ( + "github.com/DeBrosOfficial/network/pkg/cli/production/rollout" + "github.com/spf13/cobra" +) + +var rolloutCmd = &cobra.Command{ + Use: "rollout", + Short: "Build, push, and rolling upgrade all nodes in an environment", + Long: `Full deployment pipeline: build binary archive locally, push to all nodes, +then perform a rolling upgrade (one node at a time). + +Examples: + orama node rollout --env testnet # Full: build + push + rolling upgrade + orama node rollout --env testnet --no-build # Skip build, use existing archive + orama node rollout --env testnet --yes # Skip confirmation`, + Run: func(cmd *cobra.Command, args []string) { + rollout.Handle(args) + }, + DisableFlagParsing: true, +} diff --git a/pkg/cli/production/clean/clean.go b/pkg/cli/production/clean/clean.go new file mode 100644 index 0000000..65d1435 --- /dev/null +++ b/pkg/cli/production/clean/clean.go @@ -0,0 +1,183 @@ +package clean + +import ( + "bufio" + "flag" + "fmt" + "os" + "strings" + + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" +) + +// Flags holds clean command flags. +type Flags struct { + Env string // Target environment + Node string // Single node IP + Nuclear bool // Also remove shared binaries + Force bool // Skip confirmation +} + +// Handle is the entry point for the clean command. +func Handle(args []string) { + flags, err := parseFlags(args) + if err != nil { + if err == flag.ErrHelp { + return + } + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := execute(flags); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func parseFlags(args []string) (*Flags, error) { + fs := flag.NewFlagSet("clean", flag.ContinueOnError) + fs.SetOutput(os.Stderr) + + flags := &Flags{} + fs.StringVar(&flags.Env, "env", "", "Target environment (devnet, testnet) [required]") + fs.StringVar(&flags.Node, "node", "", "Clean a single node IP only") + fs.BoolVar(&flags.Nuclear, "nuclear", false, "Also remove shared binaries (rqlited, ipfs, caddy, etc.)") + fs.BoolVar(&flags.Force, "force", false, "Skip confirmation (DESTRUCTIVE)") + + if err := fs.Parse(args); err != nil { + return nil, err + } + + if flags.Env == "" { + return nil, fmt.Errorf("--env is required\nUsage: orama node clean --env --force") + } + + return flags, nil +} + +func execute(flags *Flags) error { + nodes, err := remotessh.LoadEnvNodes(flags.Env) + if err != nil { + return err + } + + if flags.Node != "" { + nodes = remotessh.FilterByIP(nodes, flags.Node) + if len(nodes) == 0 { + return fmt.Errorf("node %s not found in %s environment", flags.Node, flags.Env) + } + } + + fmt.Printf("Clean %s: %d node(s)\n", flags.Env, len(nodes)) + if flags.Nuclear { + fmt.Printf(" Mode: NUCLEAR (removes binaries too)\n") + } + for _, n := range nodes { + fmt.Printf(" - %s (%s)\n", n.Host, n.Role) + } + fmt.Println() + + // Confirm unless --force + if !flags.Force { + fmt.Printf("This will DESTROY all data on these nodes. Anyone relay keys are preserved.\n") + fmt.Printf("Type 'yes' to confirm: ") + reader := bufio.NewReader(os.Stdin) + input, _ := reader.ReadString('\n') + if strings.TrimSpace(input) != "yes" { + fmt.Println("Aborted.") + return nil + } + fmt.Println() + } + + // Clean each node + var failed []string + for i, node := range nodes { + fmt.Printf("[%d/%d] Cleaning %s...\n", i+1, len(nodes), node.Host) + if err := cleanNode(node, flags.Nuclear); err != nil { + fmt.Fprintf(os.Stderr, " ✗ %s: %v\n", node.Host, err) + failed = append(failed, node.Host) + continue + } + fmt.Printf(" ✓ %s cleaned\n\n", node.Host) + } + + if len(failed) > 0 { + return fmt.Errorf("clean failed on %d node(s): %s", len(failed), strings.Join(failed, ", ")) + } + + fmt.Printf("✓ Clean complete (%d nodes)\n", len(nodes)) + fmt.Printf(" Anyone relay keys preserved at /var/lib/anon/\n") + fmt.Printf(" To reinstall: orama node install --vps-ip ...\n") + return nil +} + +func cleanNode(node inspector.Node, nuclear bool) error { + sudo := remotessh.SudoPrefix(node) + + nuclearFlag := "" + if nuclear { + nuclearFlag = "NUCLEAR=1" + } + + // The cleanup script runs on the remote node + script := fmt.Sprintf(`%sbash -c ' +%s + +# Stop services +for svc in caddy coredns orama-node orama-gateway orama-ipfs-cluster orama-ipfs orama-olric orama-anyone-relay orama-anyone-client; do + systemctl stop "$svc" 2>/dev/null + systemctl disable "$svc" 2>/dev/null +done + +# Kill stragglers +pkill -9 -f "orama-node" 2>/dev/null || true +pkill -9 -f "olric-server" 2>/dev/null || true +pkill -9 -f "ipfs" 2>/dev/null || true + +# Remove systemd units +rm -f /etc/systemd/system/orama-*.service +rm -f /etc/systemd/system/coredns.service +rm -f /etc/systemd/system/caddy.service +systemctl daemon-reload 2>/dev/null + +# Tear down WireGuard +ip link delete wg0 2>/dev/null || true +rm -f /etc/wireguard/wg0.conf + +# Reset firewall +ufw --force reset 2>/dev/null || true +ufw default deny incoming 2>/dev/null || true +ufw default allow outgoing 2>/dev/null || true +ufw allow 22/tcp 2>/dev/null || true +ufw --force enable 2>/dev/null || true + +# Remove data +rm -rf /opt/orama + +# Clean configs +rm -rf /etc/coredns +rm -rf /etc/caddy +rm -f /tmp/orama-*.sh /tmp/network-source.tar.gz /tmp/orama-*.tar.gz + +# Nuclear: remove binaries +if [ -n "$NUCLEAR" ]; then + rm -f /usr/local/bin/orama /usr/local/bin/orama-node /usr/local/bin/gateway + rm -f /usr/local/bin/identity /usr/local/bin/sfu /usr/local/bin/turn + rm -f /usr/local/bin/olric-server /usr/local/bin/ipfs /usr/local/bin/ipfs-cluster-service + rm -f /usr/local/bin/rqlited /usr/local/bin/coredns + rm -f /usr/bin/caddy +fi + +# Verify Anyone keys preserved +if [ -d /var/lib/anon ]; then + echo " Anyone relay keys preserved at /var/lib/anon/" +fi + +echo " Node cleaned successfully" +'`, sudo, nuclearFlag) + + return remotessh.RunSSHStreaming(node, script) +} diff --git a/pkg/cli/production/install/remote.go b/pkg/cli/production/install/remote.go index b4ca02b..b5b10a5 100644 --- a/pkg/cli/production/install/remote.go +++ b/pkg/cli/production/install/remote.go @@ -2,6 +2,8 @@ package install import ( "fmt" + "os" + "path/filepath" "strconv" "strings" @@ -36,10 +38,18 @@ func NewRemoteOrchestrator(flags *Flags) (*RemoteOrchestrator, error) { } // Execute runs the remote install process. -// Source must already be uploaded via: ./scripts/upload-source.sh +// If a binary archive exists locally, uploads and extracts it on the VPS +// so Phase2b auto-detects pre-built mode. Otherwise, source must already +// be uploaded via: ./scripts/upload-source.sh func (r *RemoteOrchestrator) Execute() error { fmt.Printf("Installing on %s via SSH (%s@%s)...\n\n", r.flags.VpsIP, r.node.User, r.node.Host) + // Try to upload a binary archive if one exists locally + if err := r.uploadBinaryArchive(); err != nil { + fmt.Printf(" ⚠️ Binary archive upload skipped: %v\n", err) + fmt.Printf(" Proceeding with source mode (source must already be on VPS)\n\n") + } + // Run remote install fmt.Printf("Running install on VPS...\n\n") if err := r.runRemoteInstall(); err != nil { @@ -49,6 +59,62 @@ func (r *RemoteOrchestrator) Execute() error { return nil } +// uploadBinaryArchive finds a local binary archive and uploads + extracts it on the VPS. +// Returns nil on success, error if no archive found or upload failed. +func (r *RemoteOrchestrator) uploadBinaryArchive() error { + archivePath := r.findLocalArchive() + if archivePath == "" { + return fmt.Errorf("no binary archive found locally") + } + + fmt.Printf("Uploading binary archive: %s\n", filepath.Base(archivePath)) + + // Upload to /tmp/ on VPS + remoteTmp := "/tmp/" + filepath.Base(archivePath) + if err := uploadFile(r.node, archivePath, remoteTmp); err != nil { + return fmt.Errorf("failed to upload archive: %w", err) + } + + // Extract to /opt/orama/ on VPS + fmt.Printf("Extracting archive on VPS...\n") + extractCmd := fmt.Sprintf("%smkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s && echo ' ✓ Archive extracted to /opt/orama/'", + r.sudoPrefix(), remoteTmp, remoteTmp) + if err := runSSHStreaming(r.node, extractCmd); err != nil { + return fmt.Errorf("failed to extract archive on VPS: %w", err) + } + + fmt.Println() + return nil +} + +// findLocalArchive searches for a binary archive in common locations. +func (r *RemoteOrchestrator) findLocalArchive() string { + // Check /tmp/ for archives matching the naming pattern + entries, err := os.ReadDir("/tmp") + if err != nil { + return "" + } + + // Look for orama-*-linux-*.tar.gz, prefer newest + var best string + var bestMod int64 + for _, entry := range entries { + name := entry.Name() + if strings.HasPrefix(name, "orama-") && strings.Contains(name, "-linux-") && strings.HasSuffix(name, ".tar.gz") { + info, err := entry.Info() + if err != nil { + continue + } + if info.ModTime().Unix() > bestMod { + best = filepath.Join("/tmp", name) + bestMod = info.ModTime().Unix() + } + } + } + + return best +} + // runRemoteInstall executes `orama install` on the VPS. func (r *RemoteOrchestrator) runRemoteInstall() error { cmd := r.buildRemoteCommand() diff --git a/pkg/cli/production/push/push.go b/pkg/cli/production/push/push.go new file mode 100644 index 0000000..9cfebd9 --- /dev/null +++ b/pkg/cli/production/push/push.go @@ -0,0 +1,248 @@ +package push + +import ( + "flag" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" +) + +// Flags holds push command flags. +type Flags struct { + Env string // Target environment (devnet, testnet) + Node string // Single node IP (optional) + Direct bool // Sequential upload to each node (no fanout) +} + +// Handle is the entry point for the push command. +func Handle(args []string) { + flags, err := parseFlags(args) + if err != nil { + if err == flag.ErrHelp { + return + } + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := execute(flags); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func parseFlags(args []string) (*Flags, error) { + fs := flag.NewFlagSet("push", flag.ContinueOnError) + fs.SetOutput(os.Stderr) + + flags := &Flags{} + fs.StringVar(&flags.Env, "env", "", "Target environment (devnet, testnet) [required]") + fs.StringVar(&flags.Node, "node", "", "Push to a single node IP only") + fs.BoolVar(&flags.Direct, "direct", false, "Upload directly to each node (no hub fanout)") + + if err := fs.Parse(args); err != nil { + return nil, err + } + + if flags.Env == "" { + return nil, fmt.Errorf("--env is required\nUsage: orama node push --env ") + } + + return flags, nil +} + +func execute(flags *Flags) error { + // Find archive + archivePath := findNewestArchive() + if archivePath == "" { + return fmt.Errorf("no binary archive found in /tmp/ (run `orama build` first)") + } + + info, _ := os.Stat(archivePath) + fmt.Printf("Archive: %s (%s)\n", filepath.Base(archivePath), formatBytes(info.Size())) + + // Resolve nodes + nodes, err := remotessh.LoadEnvNodes(flags.Env) + if err != nil { + return err + } + + // Filter to single node if specified + if flags.Node != "" { + nodes = remotessh.FilterByIP(nodes, flags.Node) + if len(nodes) == 0 { + return fmt.Errorf("node %s not found in %s environment", flags.Node, flags.Env) + } + } + + fmt.Printf("Environment: %s (%d nodes)\n\n", flags.Env, len(nodes)) + + if flags.Direct || len(nodes) == 1 { + return pushDirect(archivePath, nodes) + } + + return pushFanout(archivePath, nodes) +} + +// pushDirect uploads the archive to each node sequentially. +func pushDirect(archivePath string, nodes []inspector.Node) error { + remotePath := "/tmp/" + filepath.Base(archivePath) + + for i, node := range nodes { + fmt.Printf("[%d/%d] Pushing to %s...\n", i+1, len(nodes), node.Host) + + if err := remotessh.UploadFile(node, archivePath, remotePath); err != nil { + return fmt.Errorf("upload to %s failed: %w", node.Host, err) + } + + if err := extractOnNode(node, remotePath); err != nil { + return fmt.Errorf("extract on %s failed: %w", node.Host, err) + } + + fmt.Printf(" ✓ %s done\n\n", node.Host) + } + + fmt.Printf("✓ Push complete (%d nodes)\n", len(nodes)) + return nil +} + +// pushFanout uploads to a hub node, then fans out to all others via server-to-server SCP. +func pushFanout(archivePath string, nodes []inspector.Node) error { + hub := remotessh.PickHubNode(nodes) + remotePath := "/tmp/" + filepath.Base(archivePath) + + // Step 1: Upload to hub + fmt.Printf("[hub] Uploading to %s...\n", hub.Host) + if err := remotessh.UploadFile(hub, archivePath, remotePath); err != nil { + return fmt.Errorf("upload to hub %s failed: %w", hub.Host, err) + } + + if err := extractOnNode(hub, remotePath); err != nil { + return fmt.Errorf("extract on hub %s failed: %w", hub.Host, err) + } + fmt.Printf(" ✓ hub %s done\n\n", hub.Host) + + // Step 2: Fan out from hub to remaining nodes in parallel + remaining := make([]inspector.Node, 0, len(nodes)-1) + for _, n := range nodes { + if n.Host != hub.Host { + remaining = append(remaining, n) + } + } + + if len(remaining) == 0 { + fmt.Printf("✓ Push complete (1 node)\n") + return nil + } + + fmt.Printf("[fanout] Distributing from %s to %d nodes...\n", hub.Host, len(remaining)) + + var wg sync.WaitGroup + errors := make([]error, len(remaining)) + + for i, target := range remaining { + wg.Add(1) + go func(idx int, target inspector.Node) { + defer wg.Done() + + // SCP from hub to target, then extract + scpCmd := fmt.Sprintf("sshpass -p '%s' scp -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o PreferredAuthentications=password -o PubkeyAuthentication=no %s %s@%s:%s", + target.Password, remotePath, target.User, target.Host, remotePath) + + if err := remotessh.RunSSHStreaming(hub, scpCmd); err != nil { + errors[idx] = fmt.Errorf("fanout to %s failed: %w", target.Host, err) + return + } + + if err := extractOnNodeVia(hub, target, remotePath); err != nil { + errors[idx] = fmt.Errorf("extract on %s failed: %w", target.Host, err) + return + } + + fmt.Printf(" ✓ %s done\n", target.Host) + }(i, target) + } + + wg.Wait() + + // Check for errors + var failed []string + for i, err := range errors { + if err != nil { + fmt.Fprintf(os.Stderr, " ✗ %s: %v\n", remaining[i].Host, err) + failed = append(failed, remaining[i].Host) + } + } + + if len(failed) > 0 { + return fmt.Errorf("push failed on %d node(s): %s", len(failed), strings.Join(failed, ", ")) + } + + fmt.Printf("\n✓ Push complete (%d nodes)\n", len(nodes)) + return nil +} + +// extractOnNode extracts the archive on a remote node. +func extractOnNode(node inspector.Node, remotePath string) error { + sudo := remotessh.SudoPrefix(node) + cmd := fmt.Sprintf("%smkdir -p /opt/orama && %star xzf %s -C /opt/orama && %srm -f %s", + sudo, sudo, remotePath, sudo, remotePath) + return remotessh.RunSSHStreaming(node, cmd) +} + +// extractOnNodeVia extracts the archive on a target node by SSHing through the hub. +func extractOnNodeVia(hub, target inspector.Node, remotePath string) error { + sudo := remotessh.SudoPrefix(target) + extractCmd := fmt.Sprintf("%smkdir -p /opt/orama && %star xzf %s -C /opt/orama && %srm -f %s", + sudo, sudo, remotePath, sudo, remotePath) + + // SSH from hub to target to extract + sshCmd := fmt.Sprintf("sshpass -p '%s' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 -o PreferredAuthentications=password -o PubkeyAuthentication=no %s@%s '%s'", + target.Password, target.User, target.Host, extractCmd) + + return remotessh.RunSSHStreaming(hub, sshCmd) +} + +// findNewestArchive finds the newest binary archive in /tmp/. +func findNewestArchive() string { + entries, err := os.ReadDir("/tmp") + if err != nil { + return "" + } + + var best string + var bestMod int64 + for _, entry := range entries { + name := entry.Name() + if strings.HasPrefix(name, "orama-") && strings.Contains(name, "-linux-") && strings.HasSuffix(name, ".tar.gz") { + info, err := entry.Info() + if err != nil { + continue + } + if info.ModTime().Unix() > bestMod { + best = filepath.Join("/tmp", name) + bestMod = info.ModTime().Unix() + } + } + } + + return best +} + +func formatBytes(b int64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%d B", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp]) +} diff --git a/pkg/cli/production/recover/recover.go b/pkg/cli/production/recover/recover.go new file mode 100644 index 0000000..f697325 --- /dev/null +++ b/pkg/cli/production/recover/recover.go @@ -0,0 +1,306 @@ +package recover + +import ( + "bufio" + "flag" + "fmt" + "os" + "strings" + "time" + + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" +) + +// Flags holds recover-raft command flags. +type Flags struct { + Env string // Target environment + Leader string // Leader node IP (highest commit index) + Force bool // Skip confirmation +} + +const ( + raftDir = "/opt/orama/.orama/data/rqlite/raft" + backupDir = "/tmp/rqlite-raft-backup" +) + +// Handle is the entry point for the recover-raft command. +func Handle(args []string) { + flags, err := parseFlags(args) + if err != nil { + if err == flag.ErrHelp { + return + } + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := execute(flags); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func parseFlags(args []string) (*Flags, error) { + fs := flag.NewFlagSet("recover-raft", flag.ContinueOnError) + fs.SetOutput(os.Stderr) + + flags := &Flags{} + fs.StringVar(&flags.Env, "env", "", "Target environment (devnet, testnet) [required]") + fs.StringVar(&flags.Leader, "leader", "", "Leader node IP (node with highest commit index) [required]") + fs.BoolVar(&flags.Force, "force", false, "Skip confirmation (DESTRUCTIVE)") + + if err := fs.Parse(args); err != nil { + return nil, err + } + + if flags.Env == "" { + return nil, fmt.Errorf("--env is required\nUsage: orama node recover-raft --env --leader ") + } + if flags.Leader == "" { + return nil, fmt.Errorf("--leader is required\nUsage: orama node recover-raft --env --leader ") + } + + return flags, nil +} + +func execute(flags *Flags) error { + nodes, err := remotessh.LoadEnvNodes(flags.Env) + if err != nil { + return err + } + + // Find leader node + leaderNodes := remotessh.FilterByIP(nodes, flags.Leader) + if len(leaderNodes) == 0 { + return fmt.Errorf("leader %s not found in %s environment", flags.Leader, flags.Env) + } + leader := leaderNodes[0] + + // Separate leader from followers + var followers []inspector.Node + for _, n := range nodes { + if n.Host != leader.Host { + followers = append(followers, n) + } + } + + // Print plan + fmt.Printf("Recover Raft: %s (%d nodes)\n", flags.Env, len(nodes)) + fmt.Printf(" Leader candidate: %s (%s) — raft/ data preserved\n", leader.Host, leader.Role) + for _, n := range followers { + fmt.Printf(" - %s (%s) — raft/ will be deleted\n", n.Host, n.Role) + } + fmt.Println() + + // Confirm unless --force + if !flags.Force { + fmt.Printf("⚠️ THIS WILL:\n") + fmt.Printf(" 1. Stop orama-node on ALL %d nodes\n", len(nodes)) + fmt.Printf(" 2. DELETE raft/ data on %d nodes (backup to %s)\n", len(followers), backupDir) + fmt.Printf(" 3. Keep raft/ data ONLY on %s (leader candidate)\n", leader.Host) + fmt.Printf(" 4. Restart all nodes to reform the cluster\n") + fmt.Printf("\nType 'yes' to confirm: ") + reader := bufio.NewReader(os.Stdin) + input, _ := reader.ReadString('\n') + if strings.TrimSpace(input) != "yes" { + fmt.Println("Aborted.") + return nil + } + fmt.Println() + } + + // Phase 1: Stop orama-node on ALL nodes + if err := phase1StopAll(nodes); err != nil { + return fmt.Errorf("phase 1 (stop all): %w", err) + } + + // Phase 2: Backup and delete raft/ on non-leader nodes + if err := phase2ClearFollowers(followers); err != nil { + return fmt.Errorf("phase 2 (clear followers): %w", err) + } + fmt.Printf(" Leader node %s raft/ data preserved.\n\n", leader.Host) + + // Phase 3: Start leader node and wait for Leader state + if err := phase3StartLeader(leader); err != nil { + return fmt.Errorf("phase 3 (start leader): %w", err) + } + + // Phase 4: Start remaining nodes in batches + if err := phase4StartFollowers(followers); err != nil { + return fmt.Errorf("phase 4 (start followers): %w", err) + } + + // Phase 5: Verify cluster health + phase5Verify(nodes, leader) + + return nil +} + +func phase1StopAll(nodes []inspector.Node) error { + fmt.Printf("== Phase 1: Stopping orama-node on all %d nodes ==\n", len(nodes)) + + var failed []inspector.Node + for _, node := range nodes { + sudo := remotessh.SudoPrefix(node) + fmt.Printf(" Stopping %s ... ", node.Host) + + cmd := fmt.Sprintf("%ssystemctl stop orama-node 2>&1 && echo STOPPED", sudo) + if err := remotessh.RunSSHStreaming(node, cmd); err != nil { + fmt.Printf("FAILED\n") + failed = append(failed, node) + continue + } + fmt.Println() + } + + // Kill stragglers + if len(failed) > 0 { + fmt.Printf("\n⚠️ %d nodes failed to stop. Attempting kill...\n", len(failed)) + for _, node := range failed { + sudo := remotessh.SudoPrefix(node) + cmd := fmt.Sprintf("%skillall -9 orama-node rqlited 2>/dev/null; echo KILLED", sudo) + _ = remotessh.RunSSHStreaming(node, cmd) + } + } + + fmt.Printf("\nWaiting 5s for processes to fully stop...\n") + time.Sleep(5 * time.Second) + fmt.Println() + + return nil +} + +func phase2ClearFollowers(followers []inspector.Node) error { + fmt.Printf("== Phase 2: Clearing raft state on %d non-leader nodes ==\n", len(followers)) + + for _, node := range followers { + sudo := remotessh.SudoPrefix(node) + fmt.Printf(" Clearing %s ... ", node.Host) + + script := fmt.Sprintf(`%sbash -c ' +rm -rf %s +if [ -d %s ]; then + cp -r %s %s 2>/dev/null || true + rm -rf %s + echo "CLEARED (backup at %s)" +else + echo "NO_RAFT_DIR (nothing to clear)" +fi +'`, sudo, backupDir, raftDir, raftDir, backupDir, raftDir, backupDir) + + if err := remotessh.RunSSHStreaming(node, script); err != nil { + fmt.Printf("FAILED: %v\n", err) + continue + } + fmt.Println() + } + + return nil +} + +func phase3StartLeader(leader inspector.Node) error { + fmt.Printf("== Phase 3: Starting leader node (%s) ==\n", leader.Host) + + sudo := remotessh.SudoPrefix(leader) + startCmd := fmt.Sprintf("%ssystemctl start orama-node", sudo) + if err := remotessh.RunSSHStreaming(leader, startCmd); err != nil { + return fmt.Errorf("failed to start leader node %s: %w", leader.Host, err) + } + + fmt.Printf(" Waiting for leader to become Leader...\n") + maxWait := 120 + elapsed := 0 + + for elapsed < maxWait { + // Check raft state via RQLite status endpoint + checkCmd := `curl -s --max-time 3 http://localhost:5001/status 2>/dev/null | python3 -c " +import sys,json +try: + d=json.load(sys.stdin) + print(d.get('store',{}).get('raft',{}).get('state','')) +except: + print('') +" 2>/dev/null || echo ""` + + // We can't easily capture output from RunSSHStreaming, so we use a simple approach + // Check via a combined command that prints a marker + stateCheckCmd := fmt.Sprintf(`state=$(%s); echo "RAFT_STATE=$state"`, checkCmd) + // Since RunSSHStreaming prints to stdout, we'll poll and let user see the state + fmt.Printf(" ... polling (%ds / %ds)\n", elapsed, maxWait) + + // Try to check state - the output goes to stdout via streaming + _ = remotessh.RunSSHStreaming(leader, stateCheckCmd) + + time.Sleep(5 * time.Second) + elapsed += 5 + } + + fmt.Printf(" Leader start complete. Check output above for state.\n\n") + return nil +} + +func phase4StartFollowers(followers []inspector.Node) error { + fmt.Printf("== Phase 4: Starting %d remaining nodes ==\n", len(followers)) + + batchSize := 3 + for i, node := range followers { + sudo := remotessh.SudoPrefix(node) + fmt.Printf(" Starting %s ... ", node.Host) + + cmd := fmt.Sprintf("%ssystemctl start orama-node && echo STARTED", sudo) + if err := remotessh.RunSSHStreaming(node, cmd); err != nil { + fmt.Printf("FAILED: %v\n", err) + continue + } + fmt.Println() + + // Batch delay for cluster stability + if (i+1)%batchSize == 0 && i+1 < len(followers) { + fmt.Printf(" (waiting 15s between batches for cluster stability)\n") + time.Sleep(15 * time.Second) + } + } + + fmt.Println() + return nil +} + +func phase5Verify(nodes []inspector.Node, leader inspector.Node) { + fmt.Printf("== Phase 5: Waiting for cluster to stabilize ==\n") + + // Wait in 30s increments + for _, s := range []int{30, 60, 90, 120} { + time.Sleep(30 * time.Second) + fmt.Printf(" ... %ds\n", s) + } + + fmt.Printf("\n== Cluster status ==\n") + for _, node := range nodes { + marker := "" + if node.Host == leader.Host { + marker = " ← LEADER" + } + + checkCmd := `curl -s --max-time 5 http://localhost:5001/status 2>/dev/null | python3 -c " +import sys,json +try: + d=json.load(sys.stdin) + r=d.get('store',{}).get('raft',{}) + n=d.get('store',{}).get('num_nodes','?') + print(f'state={r.get(\"state\",\"?\")} commit={r.get(\"commit_index\",\"?\")} leader={r.get(\"leader\",{}).get(\"node_id\",\"?\")} nodes={n}') +except: + print('NO_RESPONSE') +" 2>/dev/null || echo "SSH_FAILED"` + + fmt.Printf(" %s%s: ", node.Host, marker) + _ = remotessh.RunSSHStreaming(node, checkCmd) + fmt.Println() + } + + fmt.Printf("\n== Recovery complete ==\n\n") + fmt.Printf("Next steps:\n") + fmt.Printf(" 1. Run 'orama monitor report --env ' to verify full cluster health\n") + fmt.Printf(" 2. If some nodes show Candidate state, give them more time (up to 5 min)\n") + fmt.Printf(" 3. If nodes fail to join, check /opt/orama/.orama/logs/rqlite-node.log on the node\n") +} diff --git a/pkg/cli/production/rollout/rollout.go b/pkg/cli/production/rollout/rollout.go new file mode 100644 index 0000000..0ee5ffa --- /dev/null +++ b/pkg/cli/production/rollout/rollout.go @@ -0,0 +1,102 @@ +package rollout + +import ( + "flag" + "fmt" + "os" + "time" + + "github.com/DeBrosOfficial/network/pkg/cli/build" + "github.com/DeBrosOfficial/network/pkg/cli/production/push" + "github.com/DeBrosOfficial/network/pkg/cli/production/upgrade" +) + +// Flags holds rollout command flags. +type Flags struct { + Env string // Target environment (devnet, testnet) + NoBuild bool // Skip the build step + Yes bool // Skip confirmation + Delay int // Delay in seconds between nodes +} + +// Handle is the entry point for the rollout command. +func Handle(args []string) { + flags, err := parseFlags(args) + if err != nil { + if err == flag.ErrHelp { + return + } + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := execute(flags); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func parseFlags(args []string) (*Flags, error) { + fs := flag.NewFlagSet("rollout", flag.ContinueOnError) + fs.SetOutput(os.Stderr) + + flags := &Flags{} + fs.StringVar(&flags.Env, "env", "", "Target environment (devnet, testnet) [required]") + fs.BoolVar(&flags.NoBuild, "no-build", false, "Skip build step (use existing archive)") + fs.BoolVar(&flags.Yes, "yes", false, "Skip confirmation") + fs.IntVar(&flags.Delay, "delay", 30, "Delay in seconds between nodes during rolling upgrade") + + if err := fs.Parse(args); err != nil { + return nil, err + } + + if flags.Env == "" { + return nil, fmt.Errorf("--env is required\nUsage: orama node rollout --env ") + } + + return flags, nil +} + +func execute(flags *Flags) error { + start := time.Now() + + fmt.Printf("Rollout to %s\n", flags.Env) + fmt.Printf(" Build: %s\n", boolStr(!flags.NoBuild, "yes", "skip")) + fmt.Printf(" Delay: %ds between nodes\n\n", flags.Delay) + + // Step 1: Build + if !flags.NoBuild { + fmt.Printf("Step 1/3: Building binary archive...\n\n") + buildFlags := &build.Flags{ + Arch: "amd64", + } + builder := build.NewBuilder(buildFlags) + if err := builder.Build(); err != nil { + return fmt.Errorf("build failed: %w", err) + } + fmt.Println() + } else { + fmt.Printf("Step 1/3: Build skipped (--no-build)\n\n") + } + + // Step 2: Push + fmt.Printf("Step 2/3: Pushing to all %s nodes...\n\n", flags.Env) + push.Handle([]string{"--env", flags.Env}) + + fmt.Println() + + // Step 3: Rolling upgrade + fmt.Printf("Step 3/3: Rolling upgrade across %s...\n\n", flags.Env) + upgrade.Handle([]string{"--env", flags.Env, "--delay", fmt.Sprintf("%d", flags.Delay)}) + + elapsed := time.Since(start).Round(time.Second) + fmt.Printf("\nRollout complete in %s\n", elapsed) + return nil +} + +func boolStr(b bool, trueStr, falseStr string) string { + if b { + return trueStr + } + return falseStr +} diff --git a/pkg/cli/production/upgrade/command.go b/pkg/cli/production/upgrade/command.go index f9d7793..3085c31 100644 --- a/pkg/cli/production/upgrade/command.go +++ b/pkg/cli/production/upgrade/command.go @@ -14,7 +14,17 @@ func Handle(args []string) { os.Exit(1) } - // Check root privileges + // Remote rolling upgrade when --env is specified + if flags.Env != "" { + remote := NewRemoteUpgrader(flags) + if err := remote.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "❌ %v\n", err) + os.Exit(1) + } + return + } + + // Local upgrade: requires root if os.Geteuid() != 0 { fmt.Fprintf(os.Stderr, "❌ Production upgrade must be run as root (use sudo)\n") os.Exit(1) diff --git a/pkg/cli/production/upgrade/flags.go b/pkg/cli/production/upgrade/flags.go index dc2006e..ae2073f 100644 --- a/pkg/cli/production/upgrade/flags.go +++ b/pkg/cli/production/upgrade/flags.go @@ -13,6 +13,11 @@ type Flags struct { SkipChecks bool Nameserver *bool // Pointer so we can detect if explicitly set vs default + // Remote upgrade flags + Env string // Target environment for remote rolling upgrade + NodeFilter string // Single node IP to upgrade (optional) + Delay int // Delay in seconds between nodes during rolling upgrade + // Anyone flags AnyoneClient bool AnyoneRelay bool @@ -38,6 +43,11 @@ func ParseFlags(args []string) (*Flags, error) { fs.BoolVar(&flags.RestartServices, "restart", false, "Automatically restart services after upgrade") fs.BoolVar(&flags.SkipChecks, "skip-checks", false, "Skip minimum resource checks (RAM/CPU)") + // Remote upgrade flags + fs.StringVar(&flags.Env, "env", "", "Target environment for remote rolling upgrade (devnet, testnet)") + fs.StringVar(&flags.NodeFilter, "node", "", "Upgrade a single node IP only") + fs.IntVar(&flags.Delay, "delay", 30, "Delay in seconds between nodes during rolling upgrade") + // Nameserver flag - use pointer to detect if explicitly set nameserver := fs.Bool("nameserver", false, "Make this node a nameserver (uses saved preference if not specified)") diff --git a/pkg/cli/production/upgrade/orchestrator.go b/pkg/cli/production/upgrade/orchestrator.go index 455b563..459c12f 100644 --- a/pkg/cli/production/upgrade/orchestrator.go +++ b/pkg/cli/production/upgrade/orchestrator.go @@ -424,7 +424,11 @@ func (o *Orchestrator) stopAllNamespaceServices(serviceController *production.Sy // installNamespaceTemplates installs systemd template unit files for namespace services func (o *Orchestrator) installNamespaceTemplates() error { - sourceDir := filepath.Join(o.oramaHome, "src", "systemd") + // Check pre-built archive path first, fall back to source path + sourceDir := production.OramaSystemdDir + if _, err := os.Stat(sourceDir); os.IsNotExist(err) { + sourceDir = filepath.Join(o.oramaHome, "src", "systemd") + } systemdDir := "/etc/systemd/system" templates := []string{ diff --git a/pkg/cli/production/upgrade/remote.go b/pkg/cli/production/upgrade/remote.go new file mode 100644 index 0000000..e91096c --- /dev/null +++ b/pkg/cli/production/upgrade/remote.go @@ -0,0 +1,69 @@ +package upgrade + +import ( + "fmt" + "time" + + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" +) + +// RemoteUpgrader handles rolling upgrades across remote nodes. +type RemoteUpgrader struct { + flags *Flags +} + +// NewRemoteUpgrader creates a new remote upgrader. +func NewRemoteUpgrader(flags *Flags) *RemoteUpgrader { + return &RemoteUpgrader{flags: flags} +} + +// Execute runs the remote rolling upgrade. +func (r *RemoteUpgrader) Execute() error { + nodes, err := remotessh.LoadEnvNodes(r.flags.Env) + if err != nil { + return err + } + + // Filter to single node if specified + if r.flags.NodeFilter != "" { + nodes = remotessh.FilterByIP(nodes, r.flags.NodeFilter) + if len(nodes) == 0 { + return fmt.Errorf("node %s not found in %s environment", r.flags.NodeFilter, r.flags.Env) + } + } + + fmt.Printf("Rolling upgrade: %s (%d nodes, %ds delay)\n\n", r.flags.Env, len(nodes), r.flags.Delay) + + // Print execution plan + for i, node := range nodes { + fmt.Printf(" %d. %s (%s)\n", i+1, node.Host, node.Role) + } + fmt.Println() + + for i, node := range nodes { + fmt.Printf("[%d/%d] Upgrading %s (%s)...\n", i+1, len(nodes), node.Host, node.Role) + + if err := r.upgradeNode(node); err != nil { + return fmt.Errorf("upgrade failed on %s: %w\nStopping rollout — remaining nodes not upgraded", node.Host, err) + } + + fmt.Printf(" ✓ %s upgraded\n", node.Host) + + // Wait between nodes (except after the last one) + if i < len(nodes)-1 && r.flags.Delay > 0 { + fmt.Printf(" Waiting %ds before next node...\n\n", r.flags.Delay) + time.Sleep(time.Duration(r.flags.Delay) * time.Second) + } + } + + fmt.Printf("\n✓ Rolling upgrade complete (%d nodes)\n", len(nodes)) + return nil +} + +// upgradeNode runs `orama node upgrade --restart` on a single remote node. +func (r *RemoteUpgrader) upgradeNode(node inspector.Node) error { + sudo := remotessh.SudoPrefix(node) + cmd := fmt.Sprintf("%sorama node upgrade --restart", sudo) + return remotessh.RunSSHStreaming(node, cmd) +} diff --git a/pkg/cli/remotessh/config.go b/pkg/cli/remotessh/config.go new file mode 100644 index 0000000..19ab610 --- /dev/null +++ b/pkg/cli/remotessh/config.go @@ -0,0 +1,77 @@ +package remotessh + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/DeBrosOfficial/network/pkg/inspector" +) + +// FindRemoteNodesConf searches for the remote-nodes.conf file +// in common locations relative to the current directory or project root. +func FindRemoteNodesConf() string { + candidates := []string{ + "scripts/remote-nodes.conf", + "../scripts/remote-nodes.conf", + "network/scripts/remote-nodes.conf", + } + + // Also check from home dir + home, _ := os.UserHomeDir() + if home != "" { + candidates = append(candidates, filepath.Join(home, ".orama", "remote-nodes.conf")) + } + + for _, c := range candidates { + if _, err := os.Stat(c); err == nil { + return c + } + } + return "" +} + +// LoadEnvNodes loads all nodes for a given environment from remote-nodes.conf. +func LoadEnvNodes(env string) ([]inspector.Node, error) { + confPath := FindRemoteNodesConf() + if confPath == "" { + return nil, fmt.Errorf("remote-nodes.conf not found (checked scripts/, ../scripts/, network/scripts/)") + } + + nodes, err := inspector.LoadNodes(confPath) + if err != nil { + return nil, fmt.Errorf("failed to load %s: %w", confPath, err) + } + + filtered := inspector.FilterByEnv(nodes, env) + if len(filtered) == 0 { + return nil, fmt.Errorf("no nodes found for environment %q in %s", env, confPath) + } + + // Expand ~ in SSH key paths + home, _ := os.UserHomeDir() + for i := range filtered { + if filtered[i].SSHKey != "" && strings.HasPrefix(filtered[i].SSHKey, "~") { + filtered[i].SSHKey = filepath.Join(home, filtered[i].SSHKey[1:]) + } + } + + return filtered, nil +} + +// PickHubNode selects the first node as the hub for fanout distribution. +func PickHubNode(nodes []inspector.Node) inspector.Node { + return nodes[0] +} + +// FilterByIP returns nodes matching the given IP address. +func FilterByIP(nodes []inspector.Node, ip string) []inspector.Node { + var filtered []inspector.Node + for _, n := range nodes { + if n.Host == ip { + filtered = append(filtered, n) + } + } + return filtered +} diff --git a/pkg/cli/remotessh/ssh.go b/pkg/cli/remotessh/ssh.go new file mode 100644 index 0000000..e77d7e0 --- /dev/null +++ b/pkg/cli/remotessh/ssh.go @@ -0,0 +1,86 @@ +package remotessh + +import ( + "fmt" + "os" + "os/exec" + + "github.com/DeBrosOfficial/network/pkg/inspector" +) + +// UploadFile copies a local file to a remote host via SCP. +func UploadFile(node inspector.Node, localPath, remotePath string) error { + dest := fmt.Sprintf("%s@%s:%s", node.User, node.Host, remotePath) + + var cmd *exec.Cmd + if node.SSHKey != "" { + cmd = exec.Command("scp", + "-o", "StrictHostKeyChecking=no", + "-o", "ConnectTimeout=10", + "-i", node.SSHKey, + localPath, dest, + ) + } else { + if _, err := exec.LookPath("sshpass"); err != nil { + return fmt.Errorf("sshpass not found — install it: brew install hudochenkov/sshpass/sshpass") + } + cmd = exec.Command("sshpass", "-p", node.Password, + "scp", + "-o", "StrictHostKeyChecking=no", + "-o", "ConnectTimeout=10", + "-o", "PreferredAuthentications=password", + "-o", "PubkeyAuthentication=no", + localPath, dest, + ) + } + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("SCP to %s failed: %w", node.Host, err) + } + return nil +} + +// RunSSHStreaming executes a command on a remote host via SSH, +// streaming stdout/stderr to the local terminal in real-time. +func RunSSHStreaming(node inspector.Node, command string) error { + var cmd *exec.Cmd + if node.SSHKey != "" { + cmd = exec.Command("ssh", + "-o", "StrictHostKeyChecking=no", + "-o", "ConnectTimeout=10", + "-i", node.SSHKey, + fmt.Sprintf("%s@%s", node.User, node.Host), + command, + ) + } else { + cmd = exec.Command("sshpass", "-p", node.Password, + "ssh", + "-o", "StrictHostKeyChecking=no", + "-o", "ConnectTimeout=10", + "-o", "PreferredAuthentications=password", + "-o", "PubkeyAuthentication=no", + fmt.Sprintf("%s@%s", node.User, node.Host), + command, + ) + } + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + + if err := cmd.Run(); err != nil { + return fmt.Errorf("SSH to %s failed: %w", node.Host, err) + } + return nil +} + +// SudoPrefix returns "sudo " for non-root users, empty for root. +func SudoPrefix(node inspector.Node) string { + if node.User == "root" { + return "" + } + return "sudo " +} diff --git a/pkg/constants/versions.go b/pkg/constants/versions.go new file mode 100644 index 0000000..8514135 --- /dev/null +++ b/pkg/constants/versions.go @@ -0,0 +1,13 @@ +package constants + +// External dependency versions used across the network. +// Single source of truth — all installer files and build scripts import from here. +const ( + GoVersion = "1.24.6" + OlricVersion = "v0.7.0" + IPFSKuboVersion = "v0.38.2" + IPFSClusterVersion = "v1.1.2" + RQLiteVersion = "8.43.0" + CoreDNSVersion = "1.12.0" + CaddyVersion = "2.10.2" +) diff --git a/pkg/environments/production/installers/caddy.go b/pkg/environments/production/installers/caddy.go index 449653b..d8f73e7 100644 --- a/pkg/environments/production/installers/caddy.go +++ b/pkg/environments/production/installers/caddy.go @@ -7,11 +7,12 @@ import ( "os/exec" "path/filepath" "strings" + + "github.com/DeBrosOfficial/network/pkg/constants" ) const ( - caddyVersion = "2.10.2" - xcaddyRepo = "github.com/caddyserver/xcaddy/cmd/xcaddy@latest" + xcaddyRepo = "github.com/caddyserver/xcaddy/cmd/xcaddy@latest" ) // CaddyInstaller handles Caddy installation with custom DNS module @@ -26,7 +27,7 @@ type CaddyInstaller struct { func NewCaddyInstaller(arch string, logWriter io.Writer, oramaHome string) *CaddyInstaller { return &CaddyInstaller{ BaseInstaller: NewBaseInstaller(arch, logWriter), - version: caddyVersion, + version: constants.CaddyVersion, oramaHome: oramaHome, dnsModule: filepath.Join(oramaHome, "src", "pkg", "caddy", "dns", "orama"), } @@ -356,7 +357,7 @@ func (ci *CaddyInstaller) generateGoMod() string { go 1.22 require ( - github.com/caddyserver/caddy/v2 v2.` + caddyVersion[2:] + ` + github.com/caddyserver/caddy/v2 v2.` + constants.CaddyVersion[2:] + ` github.com/libdns/libdns v1.1.0 ) ` diff --git a/pkg/environments/production/installers/coredns.go b/pkg/environments/production/installers/coredns.go index 348a447..7876517 100644 --- a/pkg/environments/production/installers/coredns.go +++ b/pkg/environments/production/installers/coredns.go @@ -10,11 +10,12 @@ import ( "os/exec" "path/filepath" "time" + + "github.com/DeBrosOfficial/network/pkg/constants" ) const ( - coreDNSVersion = "1.12.0" - coreDNSRepo = "https://github.com/coredns/coredns.git" + coreDNSRepo = "https://github.com/coredns/coredns.git" ) // CoreDNSInstaller handles CoreDNS installation with RQLite plugin @@ -29,7 +30,7 @@ type CoreDNSInstaller struct { func NewCoreDNSInstaller(arch string, logWriter io.Writer, oramaHome string) *CoreDNSInstaller { return &CoreDNSInstaller{ BaseInstaller: NewBaseInstaller(arch, logWriter), - version: coreDNSVersion, + version: constants.CoreDNSVersion, oramaHome: oramaHome, rqlitePlugin: filepath.Join(oramaHome, "src", "pkg", "coredns", "rqlite"), } diff --git a/pkg/environments/production/installers/gateway.go b/pkg/environments/production/installers/gateway.go index a8e0f03..a37981a 100644 --- a/pkg/environments/production/installers/gateway.go +++ b/pkg/environments/production/installers/gateway.go @@ -7,6 +7,8 @@ import ( "os/exec" "path/filepath" "strings" + + "github.com/DeBrosOfficial/network/pkg/constants" ) // GatewayInstaller handles Orama binary installation (including gateway) @@ -124,7 +126,7 @@ func (gi *GatewayInstaller) InstallDeBrosBinaries(oramaHome string) error { // InstallGo downloads and installs Go toolchain func (gi *GatewayInstaller) InstallGo() error { - requiredVersion := "1.24.6" + requiredVersion := constants.GoVersion if goPath, err := exec.LookPath("go"); err == nil { // Check version - upgrade if too old out, _ := exec.Command(goPath, "version").Output() diff --git a/pkg/environments/production/installers/ipfs.go b/pkg/environments/production/installers/ipfs.go index f1c32c6..3346d9f 100644 --- a/pkg/environments/production/installers/ipfs.go +++ b/pkg/environments/production/installers/ipfs.go @@ -7,6 +7,8 @@ import ( "os" "os/exec" "path/filepath" + + "github.com/DeBrosOfficial/network/pkg/constants" ) // IPFSInstaller handles IPFS (Kubo) installation @@ -19,7 +21,7 @@ type IPFSInstaller struct { func NewIPFSInstaller(arch string, logWriter io.Writer) *IPFSInstaller { return &IPFSInstaller{ BaseInstaller: NewBaseInstaller(arch, logWriter), - version: "v0.38.2", + version: constants.IPFSKuboVersion, } } diff --git a/pkg/environments/production/installers/ipfs_cluster.go b/pkg/environments/production/installers/ipfs_cluster.go index 23f695b..dfe5999 100644 --- a/pkg/environments/production/installers/ipfs_cluster.go +++ b/pkg/environments/production/installers/ipfs_cluster.go @@ -8,6 +8,8 @@ import ( "os/exec" "path/filepath" "strings" + + "github.com/DeBrosOfficial/network/pkg/constants" ) // IPFSClusterInstaller handles IPFS Cluster Service installation @@ -42,7 +44,7 @@ func (ici *IPFSClusterInstaller) Install() error { return fmt.Errorf("go not found - required to install IPFS Cluster. Please install Go first") } - cmd := exec.Command("go", "install", "github.com/ipfs-cluster/ipfs-cluster/cmd/ipfs-cluster-service@latest") + cmd := exec.Command("go", "install", fmt.Sprintf("github.com/ipfs-cluster/ipfs-cluster/cmd/ipfs-cluster-service@%s", constants.IPFSClusterVersion)) cmd.Env = append(os.Environ(), "GOBIN=/usr/local/bin", "GOPROXY=https://proxy.golang.org|direct", "GONOSUMDB=*") if err := cmd.Run(); err != nil { return fmt.Errorf("failed to install IPFS Cluster: %w", err) diff --git a/pkg/environments/production/installers/olric.go b/pkg/environments/production/installers/olric.go index 409b9c9..ad56066 100644 --- a/pkg/environments/production/installers/olric.go +++ b/pkg/environments/production/installers/olric.go @@ -5,6 +5,8 @@ import ( "io" "os" "os/exec" + + "github.com/DeBrosOfficial/network/pkg/constants" ) // OlricInstaller handles Olric server installation @@ -17,7 +19,7 @@ type OlricInstaller struct { func NewOlricInstaller(arch string, logWriter io.Writer) *OlricInstaller { return &OlricInstaller{ BaseInstaller: NewBaseInstaller(arch, logWriter), - version: "v0.7.0", + version: constants.OlricVersion, } } diff --git a/pkg/environments/production/installers/rqlite.go b/pkg/environments/production/installers/rqlite.go index ea2bed6..7d2bb5e 100644 --- a/pkg/environments/production/installers/rqlite.go +++ b/pkg/environments/production/installers/rqlite.go @@ -5,6 +5,8 @@ import ( "io" "os" "os/exec" + + "github.com/DeBrosOfficial/network/pkg/constants" ) // RQLiteInstaller handles RQLite installation @@ -17,7 +19,7 @@ type RQLiteInstaller struct { func NewRQLiteInstaller(arch string, logWriter io.Writer) *RQLiteInstaller { return &RQLiteInstaller{ BaseInstaller: NewBaseInstaller(arch, logWriter), - version: "8.43.0", + version: constants.RQLiteVersion, } } diff --git a/pkg/environments/production/orchestrator.go b/pkg/environments/production/orchestrator.go index b50930d..7e5d371 100644 --- a/pkg/environments/production/orchestrator.go +++ b/pkg/environments/production/orchestrator.go @@ -259,10 +259,47 @@ func (ps *ProductionSetup) Phase2ProvisionEnvironment() error { return nil } -// Phase2bInstallBinaries installs external binaries and Orama components +// Phase2bInstallBinaries installs external binaries and Orama components. +// Auto-detects pre-built mode if /opt/orama/manifest.json exists. func (ps *ProductionSetup) Phase2bInstallBinaries() error { ps.logf("Phase 2b: Installing binaries...") + // Auto-detect pre-built binary archive + if HasPreBuiltArchive() { + manifest, err := LoadPreBuiltManifest() + if err != nil { + ps.logf(" ⚠️ Pre-built manifest found but unreadable: %v", err) + ps.logf(" Falling back to source mode...") + if err := ps.installFromSource(); err != nil { + return err + } + } else { + if err := ps.installFromPreBuilt(manifest); err != nil { + return err + } + } + } else { + // Source mode: compile everything on the VPS (original behavior) + if err := ps.installFromSource(); err != nil { + return err + } + } + + // Anyone relay/client configuration runs after BOTH paths. + // Pre-built mode installs the anon binary via .deb/apt; + // source mode installs it via the relay installer's Install(). + // Configuration (anonrc, bandwidth, migration) is always needed. + if err := ps.configureAnyone(); err != nil { + ps.logf(" ⚠️ Anyone configuration warning: %v", err) + } + + ps.logf(" ✓ All binaries installed") + return nil +} + +// installFromSource installs binaries by compiling from source on the VPS. +// This is the original Phase2bInstallBinaries logic, preserved as fallback. +func (ps *ProductionSetup) installFromSource() error { // Install system dependencies (always needed for runtime libs) if err := ps.binaryInstaller.InstallSystemDependencies(); err != nil { ps.logf(" ⚠️ System dependencies warning: %v", err) @@ -307,7 +344,12 @@ func (ps *ProductionSetup) Phase2bInstallBinaries() error { ps.logf(" ⚠️ IPFS Cluster install warning: %v", err) } - // Install Anyone (client or relay based on configuration) — apt-based, not Go + return nil +} + +// configureAnyone handles Anyone relay/client installation and configuration. +// This runs after both pre-built and source mode binary installation. +func (ps *ProductionSetup) configureAnyone() error { if ps.IsAnyoneRelay() { ps.logf(" Installing Anyone relay (operator mode)...") relayConfig := installers.AnyoneRelayConfig{ @@ -351,7 +393,7 @@ func (ps *ProductionSetup) Phase2bInstallBinaries() error { } } - // Install the relay + // Install the relay (apt-based, not Go — idempotent if already installed via .deb) if err := relayInstaller.Install(); err != nil { ps.logf(" ⚠️ Anyone relay install warning: %v", err) } @@ -364,7 +406,7 @@ func (ps *ProductionSetup) Phase2bInstallBinaries() error { ps.logf(" Installing Anyone client-only mode (SOCKS5 proxy)...") clientInstaller := installers.NewAnyoneRelayInstaller(ps.arch, ps.logWriter, installers.AnyoneRelayConfig{}) - // Install the anon binary (same apt package as relay) + // Install the anon binary (same apt package as relay — idempotent) if err := clientInstaller.Install(); err != nil { ps.logf(" ⚠️ Anyone client install warning: %v", err) } @@ -375,7 +417,6 @@ func (ps *ProductionSetup) Phase2bInstallBinaries() error { } } - ps.logf(" ✓ All binaries installed") return nil } diff --git a/pkg/environments/production/paths.go b/pkg/environments/production/paths.go index 07e38a9..a2cd310 100644 --- a/pkg/environments/production/paths.go +++ b/pkg/environments/production/paths.go @@ -11,4 +11,10 @@ const ( OramaSecrets = "/opt/orama/.orama/secrets" OramaData = "/opt/orama/.orama/data" OramaLogs = "/opt/orama/.orama/logs" + + // Pre-built binary archive paths (created by `orama build`) + OramaManifest = "/opt/orama/manifest.json" + OramaArchiveBin = "/opt/orama/bin" // Pre-built binaries + OramaSystemdDir = "/opt/orama/systemd" // Namespace service templates + OramaPackagesDir = "/opt/orama/packages" // .deb packages (e.g., anon.deb) ) diff --git a/pkg/environments/production/prebuilt.go b/pkg/environments/production/prebuilt.go new file mode 100644 index 0000000..689b8ba --- /dev/null +++ b/pkg/environments/production/prebuilt.go @@ -0,0 +1,232 @@ +package production + +import ( + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" +) + +// PreBuiltManifest describes the contents of a pre-built binary archive. +type PreBuiltManifest struct { + Version string `json:"version"` + Commit string `json:"commit"` + Date string `json:"date"` + Arch string `json:"arch"` + Checksums map[string]string `json:"checksums"` // filename -> sha256 +} + +// HasPreBuiltArchive checks if a pre-built binary archive has been extracted +// at /opt/orama/ by looking for the manifest.json file. +func HasPreBuiltArchive() bool { + _, err := os.Stat(OramaManifest) + return err == nil +} + +// LoadPreBuiltManifest loads and parses the pre-built manifest. +func LoadPreBuiltManifest() (*PreBuiltManifest, error) { + data, err := os.ReadFile(OramaManifest) + if err != nil { + return nil, fmt.Errorf("failed to read manifest: %w", err) + } + + var manifest PreBuiltManifest + if err := json.Unmarshal(data, &manifest); err != nil { + return nil, fmt.Errorf("failed to parse manifest: %w", err) + } + + return &manifest, nil +} + +// installFromPreBuilt installs all binaries from a pre-built archive. +// The archive must already be extracted at /opt/orama/ with: +// - /opt/orama/bin/ — all pre-compiled binaries +// - /opt/orama/systemd/ — namespace service templates +// - /opt/orama/packages/ — optional .deb packages +// - /opt/orama/manifest.json — archive metadata +func (ps *ProductionSetup) installFromPreBuilt(manifest *PreBuiltManifest) error { + ps.logf(" Using pre-built binary archive v%s (%s) linux/%s", manifest.Version, manifest.Commit, manifest.Arch) + + // Install minimal system dependencies (no build tools needed) + if err := ps.installMinimalSystemDeps(); err != nil { + ps.logf(" ⚠️ System dependencies warning: %v", err) + } + + // Copy binaries to runtime locations + if err := ps.deployPreBuiltBinaries(manifest); err != nil { + return fmt.Errorf("failed to deploy pre-built binaries: %w", err) + } + + // Set capabilities on binaries that need to bind privileged ports + if err := ps.setCapabilities(); err != nil { + return fmt.Errorf("failed to set capabilities: %w", err) + } + + // Disable systemd-resolved stub listener for nameserver nodes + // (needed even in pre-built mode so CoreDNS can bind port 53) + if ps.isNameserver { + if err := ps.disableResolvedStub(); err != nil { + ps.logf(" ⚠️ Failed to disable systemd-resolved stub: %v", err) + } + } + + // Install Anyone relay from .deb package if available + if ps.IsAnyoneRelay() || ps.IsAnyoneClient() { + if err := ps.installAnyonFromPreBuilt(); err != nil { + ps.logf(" ⚠️ Anyone install warning: %v", err) + } + } + + ps.logf(" ✓ All pre-built binaries installed") + return nil +} + +// installMinimalSystemDeps installs only runtime dependencies (no build tools). +func (ps *ProductionSetup) installMinimalSystemDeps() error { + ps.logf(" Installing minimal system dependencies...") + + cmd := exec.Command("apt-get", "update") + if err := cmd.Run(); err != nil { + ps.logf(" Warning: apt update failed") + } + + // Only install runtime deps — no build-essential, make, nodejs, npm needed + cmd = exec.Command("apt-get", "install", "-y", "curl", "wget", "unzip") + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to install minimal dependencies: %w", err) + } + + ps.logf(" ✓ Minimal system dependencies installed (no build tools needed)") + return nil +} + +// deployPreBuiltBinaries copies pre-built binaries to their runtime locations. +func (ps *ProductionSetup) deployPreBuiltBinaries(manifest *PreBuiltManifest) error { + ps.logf(" Deploying pre-built binaries...") + + // Binary → destination mapping + // Most go to /usr/local/bin/, caddy goes to /usr/bin/ + type binaryDest struct { + name string + dest string + } + + binaries := []binaryDest{ + {name: "orama", dest: "/usr/local/bin/orama"}, + {name: "orama-node", dest: "/usr/local/bin/orama-node"}, + {name: "gateway", dest: "/usr/local/bin/gateway"}, + {name: "identity", dest: "/usr/local/bin/identity"}, + {name: "sfu", dest: "/usr/local/bin/sfu"}, + {name: "turn", dest: "/usr/local/bin/turn"}, + {name: "olric-server", dest: "/usr/local/bin/olric-server"}, + {name: "ipfs", dest: "/usr/local/bin/ipfs"}, + {name: "ipfs-cluster-service", dest: "/usr/local/bin/ipfs-cluster-service"}, + {name: "rqlited", dest: "/usr/local/bin/rqlited"}, + {name: "coredns", dest: "/usr/local/bin/coredns"}, + {name: "caddy", dest: "/usr/bin/caddy"}, + } + + for _, bin := range binaries { + srcPath := filepath.Join(OramaArchiveBin, bin.name) + + // Skip optional binaries (e.g., coredns on non-nameserver nodes) + if _, ok := manifest.Checksums[bin.name]; !ok { + continue + } + + if _, err := os.Stat(srcPath); os.IsNotExist(err) { + ps.logf(" ⚠️ Binary %s not found in archive, skipping", bin.name) + continue + } + + if err := copyBinary(srcPath, bin.dest); err != nil { + return fmt.Errorf("failed to copy %s: %w", bin.name, err) + } + ps.logf(" ✓ %s → %s", bin.name, bin.dest) + } + + return nil +} + +// setCapabilities sets cap_net_bind_service on binaries that need to bind privileged ports. +// Both the /opt/orama/bin/ originals (used by systemd) and /usr/local/bin/ copies need caps. +func (ps *ProductionSetup) setCapabilities() error { + caps := []string{ + filepath.Join(OramaArchiveBin, "orama-node"), // systemd uses this path + "/usr/local/bin/orama-node", // PATH copy + "/usr/bin/caddy", // caddy's standard location + } + for _, binary := range caps { + if _, err := os.Stat(binary); os.IsNotExist(err) { + continue + } + cmd := exec.Command("setcap", "cap_net_bind_service=+ep", binary) + if err := cmd.Run(); err != nil { + return fmt.Errorf("setcap failed on %s: %w (node won't be able to bind port 443)", binary, err) + } + ps.logf(" ✓ setcap on %s", binary) + } + return nil +} + +// disableResolvedStub disables systemd-resolved's stub listener so CoreDNS can bind port 53. +func (ps *ProductionSetup) disableResolvedStub() error { + // Delegate to the coredns installer's method + return ps.binaryInstaller.coredns.DisableResolvedStubListener() +} + +// installAnyonFromPreBuilt installs the Anyone relay .deb from the packages dir, +// falling back to apt install if the .deb is not bundled. +func (ps *ProductionSetup) installAnyonFromPreBuilt() error { + debPath := filepath.Join(OramaPackagesDir, "anon.deb") + if _, err := os.Stat(debPath); err == nil { + ps.logf(" Installing Anyone from bundled .deb...") + cmd := exec.Command("dpkg", "-i", debPath) + if err := cmd.Run(); err != nil { + ps.logf(" ⚠️ dpkg -i failed, falling back to apt...") + cmd = exec.Command("apt-get", "install", "-y", "anon") + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to install anon: %w", err) + } + } + ps.logf(" ✓ Anyone installed from .deb") + return nil + } + + // No .deb bundled — fall back to apt (the existing path in source mode) + ps.logf(" Installing Anyone via apt (not bundled in archive)...") + cmd := exec.Command("apt-get", "install", "-y", "anon") + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to install anon via apt: %w", err) + } + ps.logf(" ✓ Anyone installed via apt") + return nil +} + +// copyBinary copies a file from src to dest, preserving executable permissions. +func copyBinary(src, dest string) error { + // Ensure parent directory exists + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + return err + } + + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + destFile, err := os.OpenFile(dest, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0755) + if err != nil { + return err + } + defer destFile.Close() + + if _, err := io.Copy(destFile, srcFile); err != nil { + return err + } + + return nil +} diff --git a/scripts/build-linux-caddy.sh b/scripts/build-linux-caddy.sh deleted file mode 100755 index 5a00ab4..0000000 --- a/scripts/build-linux-caddy.sh +++ /dev/null @@ -1,223 +0,0 @@ -#!/bin/bash -# Build Caddy with orama DNS module for linux/amd64 -# Outputs to bin-linux/caddy -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" -OUTPUT_DIR="$PROJECT_ROOT/bin-linux" -BUILD_DIR="/tmp/caddy-build-linux" -MODULE_DIR="$BUILD_DIR/caddy-dns-orama" - -mkdir -p "$OUTPUT_DIR" - -# Ensure xcaddy is installed -if ! command -v xcaddy &> /dev/null; then - echo "Installing xcaddy..." - go install github.com/caddyserver/xcaddy/cmd/xcaddy@latest -fi - -# Clean up previous build -rm -rf "$BUILD_DIR" -mkdir -p "$MODULE_DIR" - -# Write go.mod -cat > "$MODULE_DIR/go.mod" << 'GOMOD' -module github.com/DeBrosOfficial/caddy-dns-orama - -go 1.22 - -require ( - github.com/caddyserver/caddy/v2 v2.10.2 - github.com/libdns/libdns v1.1.0 -) -GOMOD - -# Write provider.go (the orama DNS provider for ACME DNS-01 challenges) -cat > "$MODULE_DIR/provider.go" << 'PROVIDERGO' -// Package orama implements a DNS provider for Caddy that uses the Orama Network -// gateway's internal ACME API for DNS-01 challenge validation. -package orama - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "net/http" - "time" - - "github.com/caddyserver/caddy/v2" - "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" - "github.com/libdns/libdns" -) - -func init() { - caddy.RegisterModule(Provider{}) -} - -// Provider wraps the Orama DNS provider for Caddy. -type Provider struct { - // Endpoint is the URL of the Orama gateway's ACME API - // Default: http://localhost:6001/v1/internal/acme - Endpoint string `json:"endpoint,omitempty"` -} - -// CaddyModule returns the Caddy module information. -func (Provider) CaddyModule() caddy.ModuleInfo { - return caddy.ModuleInfo{ - ID: "dns.providers.orama", - New: func() caddy.Module { return new(Provider) }, - } -} - -// Provision sets up the module. -func (p *Provider) Provision(ctx caddy.Context) error { - if p.Endpoint == "" { - p.Endpoint = "http://localhost:6001/v1/internal/acme" - } - return nil -} - -// UnmarshalCaddyfile parses the Caddyfile configuration. -func (p *Provider) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { - for d.Next() { - for d.NextBlock(0) { - switch d.Val() { - case "endpoint": - if !d.NextArg() { - return d.ArgErr() - } - p.Endpoint = d.Val() - default: - return d.Errf("unrecognized option: %s", d.Val()) - } - } - } - return nil -} - -// AppendRecords adds records to the zone. For ACME, this presents the challenge. -func (p *Provider) AppendRecords(ctx context.Context, zone string, records []libdns.Record) ([]libdns.Record, error) { - var added []libdns.Record - - for _, rec := range records { - rr := rec.RR() - if rr.Type != "TXT" { - continue - } - - fqdn := rr.Name + "." + zone - - payload := map[string]string{ - "fqdn": fqdn, - "value": rr.Data, - } - - body, err := json.Marshal(payload) - if err != nil { - return added, fmt.Errorf("failed to marshal request: %w", err) - } - - req, err := http.NewRequestWithContext(ctx, "POST", p.Endpoint+"/present", bytes.NewReader(body)) - if err != nil { - return added, fmt.Errorf("failed to create request: %w", err) - } - req.Header.Set("Content-Type", "application/json") - - client := &http.Client{Timeout: 30 * time.Second} - resp, err := client.Do(req) - if err != nil { - return added, fmt.Errorf("failed to present challenge: %w", err) - } - resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return added, fmt.Errorf("present failed with status %d", resp.StatusCode) - } - - added = append(added, rec) - } - - return added, nil -} - -// DeleteRecords removes records from the zone. For ACME, this cleans up the challenge. -func (p *Provider) DeleteRecords(ctx context.Context, zone string, records []libdns.Record) ([]libdns.Record, error) { - var deleted []libdns.Record - - for _, rec := range records { - rr := rec.RR() - if rr.Type != "TXT" { - continue - } - - fqdn := rr.Name + "." + zone - - payload := map[string]string{ - "fqdn": fqdn, - "value": rr.Data, - } - - body, err := json.Marshal(payload) - if err != nil { - return deleted, fmt.Errorf("failed to marshal request: %w", err) - } - - req, err := http.NewRequestWithContext(ctx, "POST", p.Endpoint+"/cleanup", bytes.NewReader(body)) - if err != nil { - return deleted, fmt.Errorf("failed to create request: %w", err) - } - req.Header.Set("Content-Type", "application/json") - - client := &http.Client{Timeout: 30 * time.Second} - resp, err := client.Do(req) - if err != nil { - return deleted, fmt.Errorf("failed to cleanup challenge: %w", err) - } - resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return deleted, fmt.Errorf("cleanup failed with status %d", resp.StatusCode) - } - - deleted = append(deleted, rec) - } - - return deleted, nil -} - -// GetRecords returns the records in the zone. Not used for ACME. -func (p *Provider) GetRecords(ctx context.Context, zone string) ([]libdns.Record, error) { - return nil, nil -} - -// SetRecords sets the records in the zone. Not used for ACME. -func (p *Provider) SetRecords(ctx context.Context, zone string, records []libdns.Record) ([]libdns.Record, error) { - return nil, nil -} - -// Interface guards -var ( - _ caddy.Module = (*Provider)(nil) - _ caddy.Provisioner = (*Provider)(nil) - _ caddyfile.Unmarshaler = (*Provider)(nil) - _ libdns.RecordAppender = (*Provider)(nil) - _ libdns.RecordDeleter = (*Provider)(nil) - _ libdns.RecordGetter = (*Provider)(nil) - _ libdns.RecordSetter = (*Provider)(nil) -) -PROVIDERGO - -# Run go mod tidy -cd "$MODULE_DIR" && go mod tidy - -# Build with xcaddy -echo "Building Caddy binary..." -GOOS=linux GOARCH=amd64 xcaddy build v2.10.2 \ - --with "github.com/DeBrosOfficial/caddy-dns-orama=$MODULE_DIR" \ - --output "$OUTPUT_DIR/caddy" - -# Cleanup -rm -rf "$BUILD_DIR" -echo "✓ Caddy built: bin-linux/caddy" diff --git a/scripts/build-linux-coredns.sh b/scripts/build-linux-coredns.sh deleted file mode 100755 index e3d36ab..0000000 --- a/scripts/build-linux-coredns.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash -# Build CoreDNS with rqlite plugin for linux/amd64 -# Outputs to bin-linux/coredns -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" -OUTPUT_DIR="$PROJECT_ROOT/bin-linux" -BUILD_DIR="/tmp/coredns-build-linux" - -mkdir -p "$OUTPUT_DIR" - -# Clean up previous build -rm -rf "$BUILD_DIR" - -# Clone CoreDNS -echo "Cloning CoreDNS v1.12.0..." -git clone --depth 1 --branch v1.12.0 https://github.com/coredns/coredns.git "$BUILD_DIR" - -# Copy rqlite plugin -echo "Copying rqlite plugin..." -mkdir -p "$BUILD_DIR/plugin/rqlite" -cp "$PROJECT_ROOT/pkg/coredns/rqlite/"*.go "$BUILD_DIR/plugin/rqlite/" - -# Write plugin.cfg -cat > "$BUILD_DIR/plugin.cfg" << 'EOF' -metadata:metadata -cancel:cancel -tls:tls -reload:reload -nsid:nsid -bufsize:bufsize -root:root -bind:bind -debug:debug -trace:trace -ready:ready -health:health -pprof:pprof -prometheus:metrics -errors:errors -log:log -dnstap:dnstap -local:local -dns64:dns64 -acl:acl -any:any -chaos:chaos -loadbalance:loadbalance -cache:cache -rewrite:rewrite -header:header -dnssec:dnssec -autopath:autopath -minimal:minimal -template:template -transfer:transfer -hosts:hosts -file:file -auto:auto -secondary:secondary -loop:loop -forward:forward -grpc:grpc -erratic:erratic -whoami:whoami -on:github.com/coredns/caddy/onevent -sign:sign -view:view -rqlite:rqlite -EOF - -# Build -cd "$BUILD_DIR" -echo "Adding dependencies..." -go get github.com/miekg/dns@latest -go get go.uber.org/zap@latest -go mod tidy - -echo "Generating plugin code..." -go generate - -echo "Building CoreDNS binary..." -GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -ldflags "-s -w" -trimpath -o coredns - -# Copy output -cp "$BUILD_DIR/coredns" "$OUTPUT_DIR/coredns" - -# Cleanup -rm -rf "$BUILD_DIR" -echo "✓ CoreDNS built: bin-linux/coredns" diff --git a/scripts/check-node-health.sh b/scripts/check-node-health.sh deleted file mode 100755 index 765dc50..0000000 --- a/scripts/check-node-health.sh +++ /dev/null @@ -1,143 +0,0 @@ -#!/bin/bash -# Check health of an Orama Network node via SSH -# -# Usage: ./scripts/check-node-health.sh [label] -# Example: ./scripts/check-node-health.sh ubuntu@57.128.223.92 '@5YnN5wIqYnyJ4' Hermes - -if [ $# -lt 2 ]; then - echo "Usage: $0 [label]" - echo "Example: $0 ubuntu@1.2.3.4 'mypassword' MyNode" - exit 1 -fi - -USERHOST="$1" -PASS="$2" -LABEL="${3:-$USERHOST}" - -echo "════════════════════════════════════════" -echo " Node Health: $LABEL ($USERHOST)" -echo "════════════════════════════════════════" -echo "" - -sshpass -p "$PASS" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 "$USERHOST" "bash -s" <<'REMOTE' - -WG_IP=$(ip -4 addr show wg0 2>/dev/null | grep -oP 'inet \K[0-9.]+' || true) - -# 1. Services -echo "── Services ──" -for svc in orama-node orama-ipfs orama-ipfs-cluster orama-olric orama-anyone-relay orama-anyone-client coredns caddy; do - status=$(systemctl is-active "$svc" 2>/dev/null || true) - case "$status" in - active) mark="✓";; - inactive) mark="·";; - activating) mark="~";; - *) mark="✗";; - esac - printf " %s %-25s %s\n" "$mark" "$svc" "$status" -done -echo "" - -# 2. WireGuard -echo "── WireGuard ──" -if [ -n "$WG_IP" ]; then - echo " IP: $WG_IP" - PEERS=$(sudo wg show wg0 2>/dev/null | grep -c '^peer:' || echo 0) - echo " Peers: $PEERS" - sudo wg show wg0 2>/dev/null | grep -A2 '^peer:' | grep -E 'endpoint|latest handshake' | while read -r line; do - echo " $line" - done -else - echo " not configured" -fi -echo "" - -# 3. RQLite (HTTP API on port 5001) -echo "── RQLite ──" -RQLITE_ADDR="" -for addr in "${WG_IP}:5001" "localhost:5001"; do - if curl -sf "http://${addr}/nodes" >/dev/null 2>&1; then - RQLITE_ADDR="$addr" - break - fi -done -if [ -n "$RQLITE_ADDR" ]; then - # Get node state from status - STATE=$(curl -sf "http://${RQLITE_ADDR}/status" 2>/dev/null | python3 -c " -import sys,json -d=json.load(sys.stdin) -print(d.get('store',{}).get('raft',{}).get('state','?')) -" 2>/dev/null || echo "?") - echo " This node: $STATE" - # Get cluster nodes - curl -sf "http://${RQLITE_ADDR}/nodes" 2>/dev/null | python3 -c " -import sys,json -d=json.load(sys.stdin) -for addr,info in sorted(d.items()): - r = 'ok' if info.get('reachable') else 'UNREACHABLE' - l = ' (LEADER)' if info.get('leader') else '' - v = 'voter' if info.get('voter') else 'non-voter' - print(' ' + addr + ': ' + r + ', ' + v + l) -print(' Total: ' + str(len(d)) + ' nodes') -" 2>/dev/null || echo " (parse error)" -else - echo " not responding" -fi -echo "" - -# 4. IPFS -echo "── IPFS ──" -PEERS=$(IPFS_PATH=/opt/orama/.orama/data/ipfs/repo /usr/local/bin/ipfs swarm peers 2>/dev/null) -if [ -n "$PEERS" ]; then - COUNT=$(echo "$PEERS" | wc -l) - echo " Connected peers: $COUNT" - echo "$PEERS" | while read -r addr; do echo " $addr"; done -else - echo " no peers connected" -fi -echo "" - -# 5. Gateway -echo "── Gateway ──" -GW=$(curl -sf http://localhost:6001/health 2>/dev/null) -if [ -n "$GW" ]; then - echo "$GW" | python3 -c " -import sys,json -d=json.load(sys.stdin) -print(' Status: ' + d.get('status','?')) -srv=d.get('server',{}) -print(' Uptime: ' + srv.get('uptime','?')) -cli=d.get('client',{}) -if cli: - checks=cli.get('checks',{}) - for k,v in checks.items(): - print(' ' + k + ': ' + str(v)) -" 2>/dev/null || echo " responding (parse error)" -else - echo " not responding" -fi -echo "" - -# 6. Olric -echo "── Olric ──" -if systemctl is-active orama-olric &>/dev/null; then - echo " service: active" - # Olric doesn't have a simple HTTP health endpoint; just check the process - OLRIC_PID=$(pgrep -f olric-server || true) - if [ -n "$OLRIC_PID" ]; then - echo " pid: $OLRIC_PID" - echo " listening: $(sudo ss -tlnp 2>/dev/null | grep olric | awk '{print $4}' | tr '\n' ' ')" - fi -else - echo " not running" -fi -echo "" - -# 7. Resources -echo "── Resources ──" -echo " RAM: $(free -h | awk '/Mem:/{print $3"/"$2}')" -echo " Disk: $(df -h / | awk 'NR==2{print $3"/"$2" ("$5" used)"}')" -echo "" - -REMOTE - -echo "════════════════════════════════════════" diff --git a/scripts/clean-testnet.sh b/scripts/clean-testnet.sh deleted file mode 100755 index 1b5ddbe..0000000 --- a/scripts/clean-testnet.sh +++ /dev/null @@ -1,249 +0,0 @@ -#!/usr/bin/env bash -# -# Clean testnet nodes for fresh reinstall. -# Preserves Anyone relay keys (/var/lib/anon/) for --anyone-migrate. -# DOES NOT TOUCH DEVNET NODES. -# -# Usage: scripts/clean-testnet.sh [--nuclear] [IP ...] -# --nuclear Also remove shared binaries (rqlited, ipfs, coredns, caddy, etc.) -# IP ... Optional: only clean specific nodes by IP (e.g. 62.72.44.87 51.178.84.172) -# If no IPs given, cleans ALL testnet nodes. -# -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -CONF="$ROOT_DIR/scripts/remote-nodes.conf" - -[[ -f "$CONF" ]] || { echo "ERROR: Missing $CONF"; exit 1; } -command -v sshpass >/dev/null 2>&1 || { echo "ERROR: sshpass not installed (brew install sshpass / apt install sshpass)"; exit 1; } - -NUCLEAR=false -TARGET_IPS=() -for arg in "$@"; do - if [[ "$arg" == "--nuclear" ]]; then - NUCLEAR=true - else - TARGET_IPS+=("$arg") - fi -done - -SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -o LogLevel=ERROR -o PubkeyAuthentication=no) - -# ── Cleanup script (runs as root on each remote node) ───────────────────── -# Uses a quoted heredoc so NO local variable expansion happens. -# This script is uploaded to /tmp/orama-clean.sh and executed remotely. -CLEANUP_SCRIPT=$(cat <<'SCRIPT_END' -#!/bin/bash -set -e -export DEBIAN_FRONTEND=noninteractive - -echo " Stopping services..." -systemctl stop orama-node orama-gateway orama-ipfs orama-ipfs-cluster orama-olric orama-anyone-relay orama-anyone-client coredns caddy 2>/dev/null || true -systemctl disable orama-node orama-gateway orama-ipfs orama-ipfs-cluster orama-olric orama-anyone-relay orama-anyone-client coredns caddy 2>/dev/null || true -# Legacy debros-* services (pre-rename) -systemctl stop debros-anyone-relay debros-anyone-client 2>/dev/null || true -systemctl disable debros-anyone-relay debros-anyone-client 2>/dev/null || true - -echo " Killing leftover processes..." -# Kill any orama/ipfs/olric/rqlite/coredns/caddy processes that survived systemd stop -pkill -f orama-node 2>/dev/null || true -pkill -f orama-gateway 2>/dev/null || true -pkill -f ipfs-cluster-service 2>/dev/null || true -pkill -f "ipfs daemon" 2>/dev/null || true -pkill -f olric-server 2>/dev/null || true -pkill -f rqlited 2>/dev/null || true -pkill -f coredns 2>/dev/null || true -# Don't pkill caddy — it's a common system service -sleep 1 - -echo " Removing systemd service files..." -rm -f /etc/systemd/system/orama-*.service -rm -f /etc/systemd/system/debros-*.service -rm -f /etc/systemd/system/coredns.service -rm -f /etc/systemd/system/caddy.service -rm -f /etc/systemd/system/orama-deploy-*.service -systemctl daemon-reload - -echo " Tearing down WireGuard..." -systemctl stop wg-quick@wg0 2>/dev/null || true -wg-quick down wg0 2>/dev/null || true -systemctl disable wg-quick@wg0 2>/dev/null || true -rm -f /etc/wireguard/wg0.conf - -echo " Resetting UFW firewall..." -ufw --force reset -ufw allow 22/tcp -ufw --force enable - -echo " Removing orama data..." -rm -rf /opt/orama - -echo " Removing legacy user and data..." -userdel -r orama 2>/dev/null || true -rm -rf /home/orama - -echo " Removing sudoers files..." -rm -f /etc/sudoers.d/orama-access -rm -f /etc/sudoers.d/orama-deployments -rm -f /etc/sudoers.d/orama-wireguard - -echo " Removing CoreDNS and Caddy configs..." -rm -rf /etc/coredns -rm -rf /etc/caddy -rm -rf /var/lib/caddy - -echo " Cleaning temp files..." -rm -f /tmp/orama /tmp/network-source.tar.gz /tmp/network-source.zip -rm -rf /tmp/network-extract /tmp/coredns-build /tmp/caddy-build - -# Nuclear: also remove shared binaries -if [ "${1:-}" = "--nuclear" ]; then - echo " Removing shared binaries (nuclear)..." - rm -f /usr/local/bin/rqlited - rm -f /usr/local/bin/ipfs - rm -f /usr/local/bin/ipfs-cluster-service - rm -f /usr/local/bin/olric-server - rm -f /usr/local/bin/coredns - rm -f /usr/local/bin/xcaddy - rm -f /usr/bin/caddy - rm -f /usr/local/bin/orama -fi - -# Verify Anyone relay keys are preserved -if [ -d /var/lib/anon/keys ]; then - echo " Anyone relay keys PRESERVED at /var/lib/anon/keys" - if [ -f /var/lib/anon/fingerprint ]; then - fp=$(cat /var/lib/anon/fingerprint 2>/dev/null || true) - echo " Relay fingerprint: $fp" - fi - if [ -f /var/lib/anon/wallet ]; then - wallet=$(cat /var/lib/anon/wallet 2>/dev/null || true) - echo " Relay wallet: $wallet" - fi -else - echo " WARNING: No Anyone relay keys found at /var/lib/anon/" -fi - -echo " DONE" -SCRIPT_END -) - -# ── Parse testnet nodes only ────────────────────────────────────────────── -hosts=() -passes=() -users=() - -while IFS='|' read -r env hostspec pass role key; do - [[ -z "$env" || "$env" == \#* ]] && continue - env="${env%%#*}" - env="$(echo "$env" | xargs)" - [[ "$env" != "testnet" ]] && continue - - # If target IPs specified, only include matching nodes - if [[ ${#TARGET_IPS[@]} -gt 0 ]]; then - node_ip="${hostspec#*@}" - matched=false - for tip in "${TARGET_IPS[@]}"; do - [[ "$tip" == "$node_ip" ]] && matched=true && break - done - $matched || continue - fi - - hosts+=("$hostspec") - passes+=("$pass") - users+=("${hostspec%%@*}") -done < "$CONF" - -if [[ ${#hosts[@]} -eq 0 ]]; then - if [[ ${#TARGET_IPS[@]} -gt 0 ]]; then - echo "ERROR: No testnet nodes found matching: ${TARGET_IPS[*]}" - else - echo "ERROR: No testnet nodes found in $CONF" - fi - exit 1 -fi - -if [[ ${#TARGET_IPS[@]} -gt 0 ]]; then - echo "== clean-testnet.sh — ${#hosts[@]} selected node(s) ==" -else - echo "== clean-testnet.sh — ${#hosts[@]} testnet nodes (ALL) ==" -fi -for i in "${!hosts[@]}"; do - echo " [$((i+1))] ${hosts[$i]}" -done -echo "" -echo "This will CLEAN the above node(s) (stop services, remove data)." -echo "Anyone relay keys (/var/lib/anon/) will be PRESERVED." -$NUCLEAR && echo "Nuclear mode: shared binaries will also be removed." -echo "" -read -rp "Type 'yes' to continue: " confirm -if [[ "$confirm" != "yes" ]]; then - echo "Aborted." - exit 0 -fi - -# ── Execute cleanup on each node ────────────────────────────────────────── -failed=() -succeeded=0 -NUCLEAR_FLAG="" -$NUCLEAR && NUCLEAR_FLAG="--nuclear" - -for i in "${!hosts[@]}"; do - h="${hosts[$i]}" - p="${passes[$i]}" - u="${users[$i]}" - echo "" - echo "== [$((i+1))/${#hosts[@]}] Cleaning $h ==" - - # Step 1: Upload cleanup script - # No -n flag here — we're piping the script content via stdin - if ! echo "$CLEANUP_SCRIPT" | sshpass -p "$p" ssh "${SSH_OPTS[@]}" "$h" \ - "cat > /tmp/orama-clean.sh && chmod +x /tmp/orama-clean.sh" 2>&1; then - echo " !! FAILED to upload script to $h" - failed+=("$h") - continue - fi - - # Step 2: Execute the cleanup script as root - if [[ "$u" == "root" ]]; then - # Root: run directly - if ! sshpass -p "$p" ssh -n "${SSH_OPTS[@]}" "$h" \ - "bash /tmp/orama-clean.sh $NUCLEAR_FLAG; rm -f /tmp/orama-clean.sh" 2>&1; then - echo " !! FAILED: $h" - failed+=("$h") - continue - fi - else - # Non-root: escape password for single-quote embedding, pipe to sudo -S - escaped_p=$(printf '%s' "$p" | sed "s/'/'\\\\''/g") - if ! sshpass -p "$p" ssh -n "${SSH_OPTS[@]}" "$h" \ - "printf '%s\n' '${escaped_p}' | sudo -S bash /tmp/orama-clean.sh $NUCLEAR_FLAG; rm -f /tmp/orama-clean.sh" 2>&1; then - echo " !! FAILED: $h" - failed+=("$h") - continue - fi - fi - - echo " OK: $h cleaned" - ((succeeded++)) || true -done - -echo "" -echo "========================================" -echo "Cleanup complete: $succeeded succeeded, ${#failed[@]} failed" -if [[ ${#failed[@]} -gt 0 ]]; then - echo "" - echo "Failed nodes:" - for f in "${failed[@]}"; do - echo " - $f" - done - echo "" - echo "Troubleshooting:" - echo " 1. Check connectivity: ssh @" - echo " 2. Check password in remote-nodes.conf" - echo " 3. Try cleaning manually: docs/CLEAN_NODE.md" -fi -echo "" -echo "Anyone relay keys preserved at /var/lib/anon/ on all nodes." -echo "Use --anyone-migrate during install to reuse existing relay identity." -echo "========================================" diff --git a/scripts/extract-deploy.sh b/scripts/extract-deploy.sh index e4db333..484b400 100755 --- a/scripts/extract-deploy.sh +++ b/scripts/extract-deploy.sh @@ -1,5 +1,11 @@ #!/bin/bash -# Extracts /tmp/network-source.tar.gz and places the CLI binary. +# Extracts archives and places binaries on VPS nodes. +# +# Supports two archive formats: +# 1. Binary archive (from `orama build`): contains bin/, systemd/, manifest.json +# → Extracts to /opt/orama/, installs CLI from /opt/orama/bin/orama +# 2. Source archive (legacy): contains Go source code + bin-linux/orama +# → Extracts to /opt/orama/src/, installs CLI from bin-linux/orama # # Local mode (run directly on VPS): # sudo bash /opt/orama/src/scripts/extract-deploy.sh @@ -11,33 +17,85 @@ set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -ARCHIVE="/tmp/network-source.tar.gz" SRC_DIR="/opt/orama/src" BIN_DIR="/opt/orama/bin" CONF="$SCRIPT_DIR/remote-nodes.conf" +# Detect archive: binary archive has manifest.json at root +detect_archive() { + local archive="$1" + if tar tzf "$archive" 2>/dev/null | grep -q "^manifest\.json$"; then + echo "binary" + else + echo "source" + fi +} + +# Find archive: check for binary archive first, then source archive +find_archive() { + # Check for binary archive (newest orama-*-linux-*.tar.gz in /tmp) + local binary_archive + binary_archive=$(ls -t /tmp/orama-*-linux-*.tar.gz 2>/dev/null | head -1) + if [ -n "$binary_archive" ]; then + echo "$binary_archive" + return + fi + + # Fall back to source archive + if [ -f "/tmp/network-source.tar.gz" ]; then + echo "/tmp/network-source.tar.gz" + return + fi + + echo "" +} + # --- Local mode (no args) --- if [ $# -eq 0 ]; then - if [ ! -f "$ARCHIVE" ]; then - echo "Error: $ARCHIVE not found" + ARCHIVE=$(find_archive) + if [ -z "$ARCHIVE" ]; then + echo "Error: No archive found in /tmp/" + echo " Expected: /tmp/orama-*-linux-*.tar.gz (binary) or /tmp/network-source.tar.gz (source)" exit 1 fi - echo "Extracting source..." - rm -rf "$SRC_DIR" - mkdir -p "$SRC_DIR" "$BIN_DIR" - tar xzf "$ARCHIVE" -C "$SRC_DIR" + FORMAT=$(detect_archive "$ARCHIVE") + echo "Archive: $ARCHIVE (format: $FORMAT)" - # Install CLI binary - if [ -f "$SRC_DIR/bin-linux/orama" ]; then - cp "$SRC_DIR/bin-linux/orama" /usr/local/bin/orama - chmod +x /usr/local/bin/orama - echo " ✓ CLI installed: /usr/local/bin/orama" + if [ "$FORMAT" = "binary" ]; then + # Binary archive → extract to /opt/orama/ + echo "Extracting binary archive..." + mkdir -p /opt/orama + tar xzf "$ARCHIVE" -C /opt/orama + + # Install CLI binary + if [ -f "$BIN_DIR/orama" ]; then + cp "$BIN_DIR/orama" /usr/local/bin/orama + chmod +x /usr/local/bin/orama + echo " ✓ CLI installed: /usr/local/bin/orama" + else + echo " ⚠️ CLI binary not found in archive (bin/orama)" + fi + + echo "Done. Ready for: sudo orama node install --vps-ip ..." else - echo " ⚠️ CLI binary not found in archive (bin-linux/orama)" - fi + # Source archive → extract to /opt/orama/src/ (legacy) + echo "Extracting source archive..." + rm -rf "$SRC_DIR" + mkdir -p "$SRC_DIR" "$BIN_DIR" + tar xzf "$ARCHIVE" -C "$SRC_DIR" - echo "Done. Ready for: sudo orama install --vps-ip ..." + # Install CLI binary + if [ -f "$SRC_DIR/bin-linux/orama" ]; then + cp "$SRC_DIR/bin-linux/orama" /usr/local/bin/orama + chmod +x /usr/local/bin/orama + echo " ✓ CLI installed: /usr/local/bin/orama" + else + echo " ⚠️ CLI binary not found in archive (bin-linux/orama)" + fi + + echo "Done. Ready for: sudo orama node install --vps-ip ..." + fi exit 0 fi diff --git a/scripts/generate-source-archive.sh b/scripts/generate-source-archive.sh deleted file mode 100755 index 7f379b8..0000000 --- a/scripts/generate-source-archive.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -# Generates a tarball of the current codebase for deployment -# Output: /tmp/network-source.tar.gz -# -# Includes bin-linux/orama (CLI binary cross-compiled via make build-linux). -# All other binaries are built from source on the VPS during install. -# -# Usage: -# make build-linux -# ./scripts/generate-source-archive.sh -# ./bin/orama install --vps-ip --nameserver --domain ... - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" -OUTPUT="/tmp/network-source.tar.gz" - -cd "$PROJECT_ROOT" - -# Remove root-level binaries before archiving (they'll be rebuilt on VPS) -rm -f gateway cli node orama-cli-linux 2>/dev/null - -# Verify CLI binary exists -if [ ! -f "bin-linux/orama" ]; then - echo "Error: bin-linux/orama not found. Run 'make build-linux' first." - exit 1 -fi - -echo "Generating source archive (with CLI binary)..." - -tar czf "$OUTPUT" \ - --exclude='.git' \ - --exclude='node_modules' \ - --exclude='*.log' \ - --exclude='.DS_Store' \ - --exclude='bin/' \ - --exclude='dist/' \ - --exclude='coverage/' \ - --exclude='.claude/' \ - --exclude='testdata/' \ - --exclude='examples/' \ - --exclude='*.tar.gz' \ - . - -echo "Archive created: $OUTPUT" -echo "Size: $(du -h $OUTPUT | cut -f1)" -echo "Includes CLI binary: bin-linux/orama" diff --git a/scripts/recover-rqlite.sh b/scripts/recover-rqlite.sh deleted file mode 100644 index fdebc66..0000000 --- a/scripts/recover-rqlite.sh +++ /dev/null @@ -1,289 +0,0 @@ -#!/usr/bin/env bash -# -# Recover RQLite cluster from split-brain. -# -# Strategy: -# 1. Stop orama-node on ALL nodes simultaneously -# 2. Keep raft/ data ONLY on the node with the highest commit index (leader candidate) -# 3. Delete raft/ on all other nodes (they'll join fresh via -join) -# 4. Start the leader candidate first, wait for it to become Leader -# 5. Start all other nodes — they discover the leader via LibP2P and join -# 6. Verify cluster health -# -# Usage: -# scripts/recover-rqlite.sh --devnet --leader 57.129.7.232 -# scripts/recover-rqlite.sh --testnet --leader -# -set -euo pipefail - -# ── Parse flags ────────────────────────────────────────────────────────────── -ENV="" -LEADER_HOST="" - -for arg in "$@"; do - case "$arg" in - --devnet) ENV="devnet" ;; - --testnet) ENV="testnet" ;; - --leader=*) LEADER_HOST="${arg#--leader=}" ;; - -h|--help) - echo "Usage: scripts/recover-rqlite.sh --devnet|--testnet --leader=" - exit 0 - ;; - *) - echo "Unknown flag: $arg" >&2 - exit 1 - ;; - esac -done - -if [[ -z "$ENV" ]]; then - echo "ERROR: specify --devnet or --testnet" >&2 - exit 1 -fi -if [[ -z "$LEADER_HOST" ]]; then - echo "ERROR: specify --leader= (the node with highest commit index)" >&2 - exit 1 -fi - -# ── Paths ──────────────────────────────────────────────────────────────────── -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -CONF="$ROOT_DIR/scripts/remote-nodes.conf" - -die() { echo "ERROR: $*" >&2; exit 1; } -[[ -f "$CONF" ]] || die "Missing $CONF" - -# ── Load nodes from conf ──────────────────────────────────────────────────── -HOSTS=() -PASSES=() -ROLES=() -SSH_KEYS=() - -while IFS='|' read -r env host pass role key; do - [[ -z "$env" || "$env" == \#* ]] && continue - env="${env%%#*}" - env="$(echo "$env" | xargs)" - [[ "$env" != "$ENV" ]] && continue - - HOSTS+=("$host") - PASSES+=("$pass") - ROLES+=("${role:-node}") - SSH_KEYS+=("${key:-}") -done < "$CONF" - -if [[ ${#HOSTS[@]} -eq 0 ]]; then - die "No nodes found for environment '$ENV' in $CONF" -fi - -echo "== recover-rqlite.sh ($ENV) — ${#HOSTS[@]} nodes ==" -echo "Leader candidate: $LEADER_HOST" -echo "" - -# Find leader index -LEADER_IDX=-1 -for i in "${!HOSTS[@]}"; do - if [[ "${HOSTS[$i]}" == *"$LEADER_HOST"* ]]; then - LEADER_IDX=$i - break - fi -done - -if [[ $LEADER_IDX -eq -1 ]]; then - die "Leader host '$LEADER_HOST' not found in node list" -fi - -echo "Nodes:" -for i in "${!HOSTS[@]}"; do - marker="" - [[ $i -eq $LEADER_IDX ]] && marker=" ← LEADER (keep data)" - echo " [$i] ${HOSTS[$i]} (${ROLES[$i]})$marker" -done -echo "" - -# ── SSH helpers ────────────────────────────────────────────────────────────── -SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10) - -node_ssh() { - local idx="$1" - shift - local h="${HOSTS[$idx]}" - local p="${PASSES[$idx]}" - local k="${SSH_KEYS[$idx]:-}" - - if [[ -n "$k" ]]; then - local expanded_key="${k/#\~/$HOME}" - if [[ -f "$expanded_key" ]]; then - ssh -i "$expanded_key" "${SSH_OPTS[@]}" "$h" "$@" 2>/dev/null - return $? - fi - fi - sshpass -p "$p" ssh -n "${SSH_OPTS[@]}" "$h" "$@" 2>/dev/null -} - -# ── Confirmation ───────────────────────────────────────────────────────────── -echo "⚠️ THIS WILL:" -echo " 1. Stop orama-node on ALL ${#HOSTS[@]} nodes" -echo " 2. DELETE raft/ data on ${#HOSTS[@]}-1 nodes (backup to /tmp/rqlite-raft-backup/)" -echo " 3. Keep raft/ data ONLY on ${HOSTS[$LEADER_IDX]} (leader candidate)" -echo " 4. Restart all nodes to reform the cluster" -echo "" -read -r -p "Continue? [y/N] " confirm -if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then - echo "Aborted." - exit 0 -fi -echo "" - -RAFT_DIR="/opt/orama/.orama/data/rqlite/raft" -BACKUP_DIR="/tmp/rqlite-raft-backup" - -# ── Phase 1: Stop orama-node on ALL nodes ─────────────────────────────────── -echo "== Phase 1: Stopping orama-node on all ${#HOSTS[@]} nodes ==" -failed=() -for i in "${!HOSTS[@]}"; do - h="${HOSTS[$i]}" - p="${PASSES[$i]}" - echo -n " Stopping $h ... " - if node_ssh "$i" "printf '%s\n' '$p' | sudo -S systemctl stop orama-node 2>&1 && echo STOPPED"; then - echo "" - else - echo "FAILED" - failed+=("$h") - fi -done - -if [[ ${#failed[@]} -gt 0 ]]; then - echo "" - echo "⚠️ ${#failed[@]} nodes failed to stop. Attempting kill..." - for i in "${!HOSTS[@]}"; do - h="${HOSTS[$i]}" - p="${PASSES[$i]}" - for fh in "${failed[@]}"; do - if [[ "$h" == "$fh" ]]; then - node_ssh "$i" "printf '%s\n' '$p' | sudo -S killall -9 orama-node rqlited 2>/dev/null; echo KILLED" || true - fi - done - done -fi - -echo "" -echo "Waiting 5s for processes to fully stop..." -sleep 5 - -# ── Phase 2: Backup and delete raft/ on non-leader nodes ──────────────────── -echo "== Phase 2: Clearing raft state on non-leader nodes ==" -for i in "${!HOSTS[@]}"; do - [[ $i -eq $LEADER_IDX ]] && continue - - h="${HOSTS[$i]}" - p="${PASSES[$i]}" - echo -n " Clearing $h ... " - if node_ssh "$i" " - printf '%s\n' '$p' | sudo -S bash -c ' - rm -rf $BACKUP_DIR - if [ -d $RAFT_DIR ]; then - cp -r $RAFT_DIR $BACKUP_DIR 2>/dev/null || true - rm -rf $RAFT_DIR - echo \"CLEARED (backup at $BACKUP_DIR)\" - else - echo \"NO_RAFT_DIR (nothing to clear)\" - fi - ' - "; then - true - else - echo "FAILED" - fi -done - -echo "" -echo "Leader node ${HOSTS[$LEADER_IDX]} raft/ data preserved." - -# ── Phase 3: Start leader node ────────────────────────────────────────────── -echo "" -echo "== Phase 3: Starting leader node (${HOSTS[$LEADER_IDX]}) ==" -lp="${PASSES[$LEADER_IDX]}" -node_ssh "$LEADER_IDX" "printf '%s\n' '$lp' | sudo -S systemctl start orama-node" || die "Failed to start leader node" - -echo " Waiting for leader to become Leader..." -max_wait=120 -elapsed=0 -while [[ $elapsed -lt $max_wait ]]; do - state=$(node_ssh "$LEADER_IDX" "curl -s --max-time 3 http://localhost:5001/status 2>/dev/null | python3 -c \"import sys,json; d=json.load(sys.stdin); print(d.get('store',{}).get('raft',{}).get('state',''))\" 2>/dev/null" || echo "") - if [[ "$state" == "Leader" ]]; then - echo " ✓ Leader node is Leader after ${elapsed}s" - break - fi - echo " ... state=$state (${elapsed}s / ${max_wait}s)" - sleep 5 - ((elapsed+=5)) -done - -if [[ "$state" != "Leader" ]]; then - echo " ⚠️ Leader did not become Leader within ${max_wait}s (state=$state)" - echo " The node may need more time. Continuing anyway..." -fi - -# ── Phase 4: Start all other nodes ────────────────────────────────────────── -echo "" -echo "== Phase 4: Starting remaining nodes ==" - -# Start non-leader nodes in batches of 3 with 15s between batches -batch_size=3 -batch_count=0 -for i in "${!HOSTS[@]}"; do - [[ $i -eq $LEADER_IDX ]] && continue - - h="${HOSTS[$i]}" - p="${PASSES[$i]}" - echo -n " Starting $h ... " - if node_ssh "$i" "printf '%s\n' '$p' | sudo -S systemctl start orama-node && echo STARTED"; then - true - else - echo "FAILED" - fi - - ((batch_count++)) - if [[ $((batch_count % batch_size)) -eq 0 ]]; then - echo " (waiting 15s between batches for cluster stability)" - sleep 15 - fi -done - -# ── Phase 5: Wait and verify ──────────────────────────────────────────────── -echo "" -echo "== Phase 5: Waiting for cluster to form (120s) ==" -sleep 30 -echo " ... 30s" -sleep 30 -echo " ... 60s" -sleep 30 -echo " ... 90s" -sleep 30 -echo " ... 120s" - -echo "" -echo "== Cluster status ==" -for i in "${!HOSTS[@]}"; do - h="${HOSTS[$i]}" - result=$(node_ssh "$i" "curl -s --max-time 5 http://localhost:5001/status 2>/dev/null | python3 -c \" -import sys,json -try: - d=json.load(sys.stdin) - r=d.get('store',{}).get('raft',{}) - n=d.get('store',{}).get('num_nodes','?') - print(f'state={r.get(\"state\",\"?\")} commit={r.get(\"commit_index\",\"?\")} leader={r.get(\"leader\",{}).get(\"node_id\",\"?\")} nodes={n}') -except: - print('NO_RESPONSE') -\" 2>/dev/null" || echo "SSH_FAILED") - marker="" - [[ $i -eq $LEADER_IDX ]] && marker=" ← LEADER" - echo " ${HOSTS[$i]}: $result$marker" -done - -echo "" -echo "== Recovery complete ==" -echo "" -echo "Next steps:" -echo " 1. Run 'scripts/inspect.sh --devnet' to verify full cluster health" -echo " 2. If some nodes show Candidate state, give them more time (up to 5 min)" -echo " 3. If nodes fail to join, check /opt/orama/.orama/logs/rqlite-node.log on the node" diff --git a/scripts/redeploy.sh b/scripts/redeploy.sh deleted file mode 100755 index 1add12e..0000000 --- a/scripts/redeploy.sh +++ /dev/null @@ -1,400 +0,0 @@ -#!/usr/bin/env bash -# -# Redeploy to all nodes in a given environment (devnet or testnet). -# Reads node credentials from scripts/remote-nodes.conf. -# -# Flow: -# 1) make build-linux -# 2) scripts/generate-source-archive.sh -> /tmp/network-source.tar.gz -# 3) scp archive + extract-deploy.sh + conf to hub node -# 4) from hub: sshpass scp to all other nodes + sudo bash /tmp/extract-deploy.sh -# 5) rolling upgrade: followers first, leader last -# per node: pre-upgrade -> stop -> extract binary -> post-upgrade -# -# Usage: -# scripts/redeploy.sh --devnet -# scripts/redeploy.sh --testnet -# scripts/redeploy.sh --devnet --no-build -# scripts/redeploy.sh --devnet --skip-build -# -set -euo pipefail - -# ── Parse flags ────────────────────────────────────────────────────────────── -ENV="" -NO_BUILD=0 - -for arg in "$@"; do - case "$arg" in - --devnet) ENV="devnet" ;; - --testnet) ENV="testnet" ;; - --no-build|--skip-build) NO_BUILD=1 ;; - -h|--help) - echo "Usage: scripts/redeploy.sh --devnet|--testnet [--no-build|--skip-build]" - exit 0 - ;; - *) - echo "Unknown flag: $arg" >&2 - echo "Usage: scripts/redeploy.sh --devnet|--testnet [--no-build|--skip-build]" >&2 - exit 1 - ;; - esac -done - -if [[ -z "$ENV" ]]; then - echo "ERROR: specify --devnet or --testnet" >&2 - exit 1 -fi - -# ── Paths ──────────────────────────────────────────────────────────────────── -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -CONF="$ROOT_DIR/scripts/remote-nodes.conf" -ARCHIVE="/tmp/network-source.tar.gz" -EXTRACT_SCRIPT="$ROOT_DIR/scripts/extract-deploy.sh" - -die() { echo "ERROR: $*" >&2; exit 1; } -need_file() { [[ -f "$1" ]] || die "Missing file: $1"; } - -need_file "$CONF" -need_file "$EXTRACT_SCRIPT" - -# ── Load nodes from conf ──────────────────────────────────────────────────── -HOSTS=() -PASSES=() -ROLES=() -SSH_KEYS=() - -while IFS='|' read -r env host pass role key; do - [[ -z "$env" || "$env" == \#* ]] && continue - env="${env%%#*}" - env="$(echo "$env" | xargs)" - [[ "$env" != "$ENV" ]] && continue - - HOSTS+=("$host") - PASSES+=("$pass") - ROLES+=("${role:-node}") - SSH_KEYS+=("${key:-}") -done < "$CONF" - -if [[ ${#HOSTS[@]} -eq 0 ]]; then - die "No nodes found for environment '$ENV' in $CONF" -fi - -echo "== redeploy.sh ($ENV) — ${#HOSTS[@]} nodes ==" -for i in "${!HOSTS[@]}"; do - echo " [$i] ${HOSTS[$i]} (${ROLES[$i]})" -done - -# ── Pick hub node ──────────────────────────────────────────────────────────── -# Hub = first node that has an SSH key configured (direct SCP from local). -# If none have a key, use the first node (via sshpass). -HUB_IDX=0 -HUB_KEY="" -for i in "${!HOSTS[@]}"; do - if [[ -n "${SSH_KEYS[$i]}" ]]; then - expanded_key="${SSH_KEYS[$i]/#\~/$HOME}" - if [[ -f "$expanded_key" ]]; then - HUB_IDX=$i - HUB_KEY="$expanded_key" - break - fi - fi -done - -HUB_HOST="${HOSTS[$HUB_IDX]}" -HUB_PASS="${PASSES[$HUB_IDX]}" - -echo "Hub: $HUB_HOST (idx=$HUB_IDX, key=${HUB_KEY:-none})" - -# ── Build ──────────────────────────────────────────────────────────────────── -if [[ "$NO_BUILD" -eq 0 ]]; then - echo "== build-linux ==" - (cd "$ROOT_DIR" && make build-linux) || { - echo "WARN: make build-linux failed; continuing if existing bin-linux is acceptable." - } -else - echo "== skipping build (--no-build) ==" -fi - -# ── Generate source archive ───────────────────────────────────────────────── -echo "== generate source archive ==" -(cd "$ROOT_DIR" && ./scripts/generate-source-archive.sh) -need_file "$ARCHIVE" - -# ── Helper: SSH/SCP to hub ─────────────────────────────────────────────────── -SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null) - -hub_scp() { - if [[ -n "$HUB_KEY" ]]; then - scp -i "$HUB_KEY" "${SSH_OPTS[@]}" "$@" - else - sshpass -p "$HUB_PASS" scp "${SSH_OPTS[@]}" "$@" - fi -} - -hub_ssh() { - if [[ -n "$HUB_KEY" ]]; then - ssh -i "$HUB_KEY" "${SSH_OPTS[@]}" "$@" - else - sshpass -p "$HUB_PASS" ssh "${SSH_OPTS[@]}" "$@" - fi -} - -# ── Upload to hub ──────────────────────────────────────────────────────────── -echo "== upload archive + extract script + conf to hub ($HUB_HOST) ==" -hub_scp "$ARCHIVE" "$EXTRACT_SCRIPT" "$CONF" "$HUB_HOST":/tmp/ - -# ── Remote: fan-out + extract + rolling upgrade ───────────────────────────── -echo "== fan-out + extract + rolling upgrade from hub ==" - -hub_ssh "$HUB_HOST" "DEPLOY_ENV=$ENV HUB_IDX=$HUB_IDX bash -s" <<'REMOTE' -set -euo pipefail -export DEBIAN_FRONTEND=noninteractive - -TAR=/tmp/network-source.tar.gz -EX=/tmp/extract-deploy.sh -CONF=/tmp/remote-nodes.conf - -[[ -f "$TAR" ]] || { echo "Missing $TAR on hub"; exit 2; } -[[ -f "$EX" ]] || { echo "Missing $EX on hub"; exit 2; } -[[ -f "$CONF" ]] || { echo "Missing $CONF on hub"; exit 2; } -chmod +x "$EX" || true - -# Parse conf file on the hub — same format as local -hosts=() -passes=() -idx=0 -hub_host="" -hub_pass="" - -while IFS='|' read -r env host pass role key; do - [[ -z "$env" || "$env" == \#* ]] && continue - env="${env%%#*}" - env="$(echo "$env" | xargs)" - [[ "$env" != "$DEPLOY_ENV" ]] && continue - - if [[ $idx -eq $HUB_IDX ]]; then - hub_host="$host" - hub_pass="$pass" - else - hosts+=("$host") - passes+=("$pass") - fi - ((idx++)) || true -done < "$CONF" - -echo "Hub: $hub_host (this machine)" -echo "Fan-out nodes: ${#hosts[@]}" - -# Install sshpass on hub if needed -if [[ ${#hosts[@]} -gt 0 ]] && ! command -v sshpass >/dev/null 2>&1; then - echo "Installing sshpass on hub..." - printf '%s\n' "$hub_pass" | sudo -S apt-get update -y >/dev/null - printf '%s\n' "$hub_pass" | sudo -S apt-get install -y sshpass >/dev/null -fi - -echo "== fan-out: upload to ${#hosts[@]} nodes ==" -upload_failed=() -for i in "${!hosts[@]}"; do - h="${hosts[$i]}" - p="${passes[$i]}" - echo " -> $h" - if ! sshpass -p "$p" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$TAR" "$EX" "$h":/tmp/; then - echo " !! UPLOAD FAILED: $h" - upload_failed+=("$h") - fi -done - -echo "== extract on all fan-out nodes ==" -for i in "${!hosts[@]}"; do - h="${hosts[$i]}" - p="${passes[$i]}" - echo " -> $h" - if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$h" "printf '%s\n' '$p' | sudo -S bash /tmp/extract-deploy.sh >/tmp/extract.log 2>&1 && echo OK"; then - echo " !! EXTRACT FAILED: $h" - upload_failed+=("$h") - fi -done - -if [[ ${#upload_failed[@]} -gt 0 ]]; then - echo "" - echo "WARNING: ${#upload_failed[@]} nodes had upload/extract failures:" - for uf in "${upload_failed[@]}"; do - echo " - $uf" - done - echo "Continuing with rolling restart..." -fi - -echo "== extract on hub ==" -printf '%s\n' "$hub_pass" | sudo -S bash "$EX" >/tmp/extract.log 2>&1 - -# ── Raft state detection ── -raft_state() { - local h="$1" p="$2" - local cmd="curl -s http://localhost:5001/status" - local parse_py='import sys,json; j=json.load(sys.stdin); r=j.get("store",{}).get("raft",{}); print((r.get("state") or ""), (r.get("num_peers") or 0), (r.get("voter") is True))' - sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$h" "$cmd | python3 -c '$parse_py'" 2>/dev/null || true -} - -echo "== detect leader ==" -leader="" -leader_pass="" - -for i in "${!hosts[@]}"; do - h="${hosts[$i]}" - p="${passes[$i]}" - out="$(raft_state "$h" "$p")" - echo " $h -> ${out:-NO_OUTPUT}" - if [[ "$out" == Leader* ]]; then - leader="$h" - leader_pass="$p" - break - fi -done - -# Check hub itself -if [[ -z "$leader" ]]; then - hub_out="$(curl -s http://localhost:5001/status | python3 -c 'import sys,json; j=json.load(sys.stdin); r=j.get("store",{}).get("raft",{}); print((r.get("state") or ""), (r.get("num_peers") or 0), (r.get("voter") is True))' 2>/dev/null || true)" - echo " hub(localhost) -> ${hub_out:-NO_OUTPUT}" - if [[ "$hub_out" == Leader* ]]; then - leader="HUB" - leader_pass="$hub_pass" - fi -fi - -if [[ -z "$leader" ]]; then - echo "No leader detected. Aborting before upgrades." - exit 3 -fi -echo "Leader: $leader" - -failed_nodes=() - -# ── Per-node upgrade flow ── -# Uses pre-upgrade (maintenance + leadership transfer + propagation wait) -# then stops, deploys binary, and post-upgrade (start + health verification). -upgrade_one() { - local h="$1" p="$2" - echo "== upgrade $h ==" - - # 1. Pre-upgrade: enter maintenance, transfer leadership, wait for propagation - echo " [1/4] pre-upgrade..." - if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$h" "printf '%s\n' '$p' | sudo -S orama prod pre-upgrade" 2>&1; then - echo " !! pre-upgrade failed on $h (continuing with stop)" - fi - - # 2. Stop all services - echo " [2/4] stopping services..." - if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$h" "printf '%s\n' '$p' | sudo -S systemctl stop 'orama-*'" 2>&1; then - echo " !! stop failed on $h" - failed_nodes+=("$h") - return 1 - fi - - # 3. Deploy new binary - echo " [3/4] deploying binary..." - if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$h" "printf '%s\n' '$p' | sudo -S bash /tmp/extract-deploy.sh >/tmp/extract.log 2>&1 && echo OK" 2>&1; then - echo " !! extract failed on $h" - failed_nodes+=("$h") - return 1 - fi - - # 4. Post-upgrade: start services, verify health, exit maintenance - echo " [4/4] post-upgrade..." - if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ - "$h" "printf '%s\n' '$p' | sudo -S orama prod post-upgrade" 2>&1; then - echo " !! post-upgrade failed on $h" - failed_nodes+=("$h") - return 1 - fi - - echo " OK: $h" -} - -upgrade_hub() { - echo "== upgrade hub (localhost) ==" - - # 1. Pre-upgrade - echo " [1/4] pre-upgrade..." - if ! (printf '%s\n' "$hub_pass" | sudo -S orama prod pre-upgrade) 2>&1; then - echo " !! pre-upgrade failed on hub (continuing with stop)" - fi - - # 2. Stop all services - echo " [2/4] stopping services..." - if ! (printf '%s\n' "$hub_pass" | sudo -S systemctl stop 'orama-*') 2>&1; then - echo " !! stop failed on hub ($hub_host)" - failed_nodes+=("$hub_host (hub)") - return 1 - fi - - # 3. Deploy new binary - echo " [3/4] deploying binary..." - if ! (printf '%s\n' "$hub_pass" | sudo -S bash "$EX" >/tmp/extract.log 2>&1); then - echo " !! extract failed on hub ($hub_host)" - failed_nodes+=("$hub_host (hub)") - return 1 - fi - - # 4. Post-upgrade - echo " [4/4] post-upgrade..." - if ! (printf '%s\n' "$hub_pass" | sudo -S orama prod post-upgrade) 2>&1; then - echo " !! post-upgrade failed on hub ($hub_host)" - failed_nodes+=("$hub_host (hub)") - return 1 - fi - - echo " OK: hub ($hub_host)" -} - -echo "== rolling upgrade (followers first, leader last) ==" -for i in "${!hosts[@]}"; do - h="${hosts[$i]}" - p="${passes[$i]}" - [[ "$h" == "$leader" ]] && continue - upgrade_one "$h" "$p" || true -done - -# Upgrade hub if not the leader -if [[ "$leader" != "HUB" ]]; then - upgrade_hub || true -fi - -# Upgrade leader last -echo "== upgrade leader last ==" -if [[ "$leader" == "HUB" ]]; then - upgrade_hub || true -else - upgrade_one "$leader" "$leader_pass" || true -fi - -# Clean up conf from hub -rm -f "$CONF" - -# ── Report results ── -echo "" -echo "========================================" -if [[ ${#failed_nodes[@]} -gt 0 ]]; then - echo "UPGRADE COMPLETED WITH FAILURES (${#failed_nodes[@]} nodes failed):" - for fn in "${failed_nodes[@]}"; do - echo " FAILED: $fn" - done - echo "" - echo "Recommended actions:" - echo " 1. SSH into the failed node(s)" - echo " 2. Check logs: sudo orama prod logs node --follow" - echo " 3. Manually run: sudo orama prod post-upgrade" - echo "========================================" - exit 1 -else - echo "All nodes upgraded successfully." - echo "========================================" -fi -REMOTE - -echo "== complete ==" diff --git a/scripts/upgrade-nodes.sh b/scripts/upgrade-nodes.sh deleted file mode 100755 index 0c1d076..0000000 --- a/scripts/upgrade-nodes.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# Rolling upgrade of nodes: runs `orama node upgrade --restart` one node at a time. -# -# Usage: -# ./scripts/upgrade-nodes.sh --env testnet -# ./scripts/upgrade-nodes.sh --env devnet -# ./scripts/upgrade-nodes.sh [ ...] - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -CONF="$SCRIPT_DIR/remote-nodes.conf" - -resolve_nodes() { - if [ "$1" = "--env" ] && [ -n "$2" ] && [ -f "$CONF" ]; then - grep "^$2|" "$CONF" | while IFS='|' read -r env userhost pass role; do - local user="${userhost%%@*}" - local host="${userhost##*@}" - echo "$user|$host|$pass" - done - return - fi - - for ip in "$@"; do - if [ -f "$CONF" ]; then - local match - match=$(grep "|[^|]*@${ip}|" "$CONF" | head -1) - if [ -n "$match" ]; then - local userhost pass - userhost=$(echo "$match" | cut -d'|' -f2) - pass=$(echo "$match" | cut -d'|' -f3) - local user="${userhost%%@*}" - echo "$user|$ip|$pass" - continue - fi - fi - echo "ubuntu|$ip|" - done -} - -upgrade_node() { - local user="$1" host="$2" pass="$3" - - echo "→ Upgrading $user@$host..." - - local sudo_prefix="" - [ "$user" != "root" ] && sudo_prefix="sudo " - - local cmd="${sudo_prefix}orama node upgrade --restart" - - if [ -n "$pass" ]; then - sshpass -p "$pass" ssh -n -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$user@$host" "$cmd" - else - ssh -n -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - "$user@$host" "$cmd" - fi -} - -if [ $# -eq 0 ]; then - echo "Usage: $0 --env " - echo " $0 [ ...]" - exit 1 -fi - -# Count nodes -node_count=$(resolve_nodes "$@" | wc -l | tr -d ' ') -echo "Rolling upgrade: $node_count nodes (serial)" -echo "" - -i=0 -resolve_nodes "$@" | while IFS='|' read -r user host pass; do - i=$((i + 1)) - echo "[$i/$node_count] $user@$host" - upgrade_node "$user" "$host" "$pass" - echo " ✓ Done" - if [ "$i" -lt "$node_count" ]; then - echo " Waiting 30s before next node..." - sleep 30 - fi - echo "" -done - -echo "Rolling upgrade complete." diff --git a/scripts/upload-source-fanout.sh b/scripts/upload-source-fanout.sh deleted file mode 100755 index 3ed4961..0000000 --- a/scripts/upload-source-fanout.sh +++ /dev/null @@ -1,210 +0,0 @@ -#!/bin/bash -# Upload source to one seed node, then fan out to all others in parallel. -# ~3x faster than sequential: one slow upload + fast parallel inter-node transfers. -# -# Usage: -# ./scripts/upload-source-fanout.sh --env devnet -# ./scripts/upload-source-fanout.sh --env testnet - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -ARCHIVE="/tmp/network-source.tar.gz" -CONF="$SCRIPT_DIR/remote-nodes.conf" -REMOTE_ARCHIVE="/tmp/network-source.tar.gz" - -if [ ! -f "$ARCHIVE" ]; then - echo "Error: $ARCHIVE not found" - echo "Run: make build-linux && ./scripts/generate-source-archive.sh" - exit 1 -fi - -if [ "$1" != "--env" ] || [ -z "$2" ]; then - echo "Usage: $0 --env " - exit 1 -fi - -ENV="$2" - -# Parse all nodes for this environment -declare -a USERS HOSTS PASSES KEYS -i=0 -while IFS='|' read -r env userhost pass role key; do - [ -z "$env" ] && continue - case "$env" in \#*) continue;; esac - env="$(echo "$env" | xargs)" - [ "$env" != "$ENV" ] && continue - - USERS[$i]="${userhost%%@*}" - HOSTS[$i]="${userhost##*@}" - PASSES[$i]="$pass" - KEYS[$i]="$(echo "${key:-}" | xargs)" - ((i++)) -done < "$CONF" - -TOTAL=${#HOSTS[@]} -if [ "$TOTAL" -eq 0 ]; then - echo "No nodes found for environment: $ENV" - exit 1 -fi - -echo "Source archive: $ARCHIVE ($(du -h "$ARCHIVE" | cut -f1))" -echo "Fanout: upload to 1 seed, then parallel to $((TOTAL - 1)) others" -echo "" - -# --- Helper functions --- - -run_ssh() { - local user="$1" host="$2" pass="$3" key="$4" - shift 4 - local opts="-o StrictHostKeyChecking=no -o ConnectTimeout=10" - if [ -n "$key" ]; then - ssh -n $opts -i "$key" "$user@$host" "$@" - elif [ -n "$pass" ]; then - sshpass -p "$pass" ssh -n $opts \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$user@$host" "$@" - else - ssh -n $opts "$user@$host" "$@" - fi -} - -# Like run_ssh but without -n, so stdin can be piped through -run_ssh_stdin() { - local user="$1" host="$2" pass="$3" key="$4" - shift 4 - local opts="-o StrictHostKeyChecking=no -o ConnectTimeout=10" - if [ -n "$key" ]; then - ssh $opts -i "$key" "$user@$host" "$@" - elif [ -n "$pass" ]; then - sshpass -p "$pass" ssh $opts \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$user@$host" "$@" - else - ssh $opts "$user@$host" "$@" - fi -} - -run_scp() { - local user="$1" host="$2" pass="$3" key="$4" src="$5" dst="$6" - local opts="-o StrictHostKeyChecking=no -o ConnectTimeout=10" - if [ -n "$key" ]; then - scp $opts -i "$key" "$src" "$user@$host:$dst" - elif [ -n "$pass" ]; then - sshpass -p "$pass" scp $opts \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$src" "$user@$host:$dst" - else - scp $opts "$src" "$user@$host:$dst" - fi -} - -extract_on_node() { - local user="$1" host="$2" pass="$3" key="$4" - local sudo_prefix="" - [ "$user" != "root" ] && sudo_prefix="sudo " - run_ssh "$user" "$host" "$pass" "$key" \ - "${sudo_prefix}bash -c 'rm -rf /opt/orama/src && mkdir -p /opt/orama/src /opt/orama/bin && tar xzf $REMOTE_ARCHIVE -C /opt/orama/src 2>/dev/null && if [ -f /opt/orama/src/bin-linux/orama ]; then cp /opt/orama/src/bin-linux/orama /usr/local/bin/orama && chmod +x /usr/local/bin/orama; fi && echo \"\$(ls /opt/orama/src/ | wc -l) files\"'" -} - -# --- Step 1: Upload to seed (first node) --- - -SEED_USER="${USERS[0]}" -SEED_HOST="${HOSTS[0]}" -SEED_PASS="${PASSES[0]}" -SEED_KEY="${KEYS[0]}" - -echo "=== Step 1/3: Upload to seed ($SEED_USER@$SEED_HOST) ===" -run_scp "$SEED_USER" "$SEED_HOST" "$SEED_PASS" "$SEED_KEY" "$ARCHIVE" "$REMOTE_ARCHIVE" -extract_on_node "$SEED_USER" "$SEED_HOST" "$SEED_PASS" "$SEED_KEY" -echo " ✓ Seed ready" -echo "" - -# --- Step 2: Install sshpass on seed if needed --- - -echo "=== Step 2/3: Prepare seed for fanout ===" -run_ssh "$SEED_USER" "$SEED_HOST" "$SEED_PASS" "$SEED_KEY" \ - "which sshpass >/dev/null 2>&1 || (sudo apt-get update -qq >/dev/null 2>&1 && sudo apt-get install -y -qq sshpass >/dev/null 2>&1)" -echo " ✓ sshpass available on seed" -echo "" - -# --- Step 3: Fan out from seed to all other nodes in parallel --- - -echo "=== Step 3/3: Fanout to $((TOTAL - 1)) nodes ===" - -# Collect nodes that need key-based auth (can't fanout, key is local) -declare -a KEY_NODES - -# Build a targets file for the seed: user|host|pass|is_root (one per line, base64-encoded passwords) -TARGETS_CONTENT="" -for ((j=1; j /tmp/fanout-targets.txt" <<< "$TARGETS_CONTENT" - -FANOUT='#!/bin/bash -ARCHIVE="/tmp/network-source.tar.gz" -PIDS=() -LABELS=() - -while IFS="|" read -r user host b64pass is_root; do - [ -z "$user" ] && continue - pass=$(echo "$b64pass" | base64 -d) - sudo_prefix="" - [ "$is_root" != "1" ] && sudo_prefix="sudo " - - ( - sshpass -p "$pass" scp \ - -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$ARCHIVE" "$user@$host:$ARCHIVE" && \ - sshpass -p "$pass" ssh -n \ - -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$user@$host" \ - "${sudo_prefix}bash -c '\''rm -rf /opt/orama/src && mkdir -p /opt/orama/src /opt/orama/bin && tar xzf /tmp/network-source.tar.gz -C /opt/orama/src 2>/dev/null && if [ -f /opt/orama/src/bin-linux/orama ]; then cp /opt/orama/src/bin-linux/orama /usr/local/bin/orama && chmod +x /usr/local/bin/orama; fi'\''" && \ - echo " ✓ $user@$host" || \ - echo " ✗ $user@$host FAILED" - ) & - PIDS+=($!) - LABELS+=("$user@$host") -done < /tmp/fanout-targets.txt - -FAILED=0 -for i in "${!PIDS[@]}"; do - if ! wait "${PIDS[$i]}"; then - FAILED=1 - fi -done - -rm -f /tmp/fanout-targets.txt /tmp/fanout.sh -exit $FAILED -' - -run_ssh_stdin "$SEED_USER" "$SEED_HOST" "$SEED_PASS" "$SEED_KEY" "cat > /tmp/fanout.sh && chmod +x /tmp/fanout.sh" <<< "$FANOUT" - -# Run fanout (allocate tty for live output) -run_ssh "$SEED_USER" "$SEED_HOST" "$SEED_PASS" "$SEED_KEY" "bash /tmp/fanout.sh" - -# Handle key-based auth nodes directly from local (key isn't on seed) -for idx in "${KEY_NODES[@]}"; do - echo "" - echo "→ Direct upload to ${USERS[$idx]}@${HOSTS[$idx]} (SSH key auth)..." - run_scp "${USERS[$idx]}" "${HOSTS[$idx]}" "${PASSES[$idx]}" "${KEYS[$idx]}" "$ARCHIVE" "$REMOTE_ARCHIVE" - extract_on_node "${USERS[$idx]}" "${HOSTS[$idx]}" "${PASSES[$idx]}" "${KEYS[$idx]}" - echo " ✓ ${USERS[$idx]}@${HOSTS[$idx]}" -done - -echo "" -echo "Done. All $TOTAL nodes updated." -echo "Now run: ./bin/orama install --vps-ip ..." diff --git a/scripts/upload-source.sh b/scripts/upload-source.sh deleted file mode 100755 index 53c15f9..0000000 --- a/scripts/upload-source.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/bash -# Upload and extract the source archive to one or more VPS nodes. -# -# Prerequisites: -# make build-linux -# ./scripts/generate-source-archive.sh -# -# Usage: -# ./scripts/upload-source.sh [ ...] -# ./scripts/upload-source.sh --env testnet # upload to all testnet nodes -# -# After uploading, run install: -# ./bin/orama install --vps-ip --nameserver --domain ... - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -ARCHIVE="/tmp/network-source.tar.gz" -CONF="$SCRIPT_DIR/remote-nodes.conf" - -if [ ! -f "$ARCHIVE" ]; then - echo "Error: $ARCHIVE not found" - echo "Run: make build-linux && ./scripts/generate-source-archive.sh" - exit 1 -fi - -# Resolve VPS list from --env flag or direct IPs -resolve_nodes() { - if [ "$1" = "--env" ] && [ -n "$2" ] && [ -f "$CONF" ]; then - grep "^$2|" "$CONF" | while IFS='|' read -r env userhost pass role; do - local user="${userhost%%@*}" - local host="${userhost##*@}" - echo "$user|$host|$pass" - done - return - fi - - # Direct IPs — look up credentials from conf - for ip in "$@"; do - if [ -f "$CONF" ]; then - local match - match=$(grep "|[^|]*@${ip}|" "$CONF" | head -1) - if [ -n "$match" ]; then - local userhost pass - userhost=$(echo "$match" | cut -d'|' -f2) - pass=$(echo "$match" | cut -d'|' -f3) - local user="${userhost%%@*}" - echo "$user|$ip|$pass" - continue - fi - fi - # Fallback: prompt for credentials - echo "ubuntu|$ip|" - done -} - -upload_to_node() { - local user="$1" host="$2" pass="$3" - - echo "→ Uploading to $user@$host..." - - # Upload archive - if [ -n "$pass" ]; then - sshpass -p "$pass" scp -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$ARCHIVE" "$user@$host:/tmp/network-source.tar.gz" - else - scp -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - "$ARCHIVE" "$user@$host:/tmp/network-source.tar.gz" - fi - - # Extract on VPS - local sudo_prefix="" - [ "$user" != "root" ] && sudo_prefix="sudo " - - local extract_cmd="${sudo_prefix}bash -c 'rm -rf /opt/orama/src && mkdir -p /opt/orama/src /opt/orama/bin && tar xzf /tmp/network-source.tar.gz -C /opt/orama/src 2>/dev/null && if [ -f /opt/orama/src/bin-linux/orama ]; then cp /opt/orama/src/bin-linux/orama /usr/local/bin/orama && chmod +x /usr/local/bin/orama; fi && echo \" ✓ Extracted (\$(ls /opt/orama/src/ | wc -l) files)\"'" - - if [ -n "$pass" ]; then - sshpass -p "$pass" ssh -n -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - -o PreferredAuthentications=password -o PubkeyAuthentication=no \ - "$user@$host" "$extract_cmd" - else - ssh -n -o StrictHostKeyChecking=no -o ConnectTimeout=10 \ - "$user@$host" "$extract_cmd" - fi -} - -# Main -if [ $# -eq 0 ]; then - echo "Usage: $0 [ ...]" - echo " $0 --env testnet" - exit 1 -fi - -echo "Source archive: $ARCHIVE ($(du -h "$ARCHIVE" | cut -f1))" -echo "" - -resolve_nodes "$@" | while IFS='|' read -r user host pass; do - upload_to_node "$user" "$host" "$pass" - echo "" -done - -echo "Done. Now run: ./bin/orama install --vps-ip ..."