#!/usr/bin/env bash # # Redeploy to all nodes in a given environment (devnet or testnet). # Reads node credentials from scripts/remote-nodes.conf. # # Flow: # 1) make build-linux-all # 2) scripts/generate-source-archive.sh -> /tmp/network-source.tar.gz # 3) scp archive + extract-deploy.sh + conf to hub node # 4) from hub: sshpass scp to all other nodes + sudo bash /tmp/extract-deploy.sh # 5) rolling upgrade: followers first, leader last # per node: pre-upgrade -> stop -> extract binary -> post-upgrade # # Usage: # scripts/redeploy.sh --devnet # scripts/redeploy.sh --testnet # scripts/redeploy.sh --devnet --no-build # scripts/redeploy.sh --devnet --skip-build # set -euo pipefail # ── Parse flags ────────────────────────────────────────────────────────────── ENV="" NO_BUILD=0 for arg in "$@"; do case "$arg" in --devnet) ENV="devnet" ;; --testnet) ENV="testnet" ;; --no-build|--skip-build) NO_BUILD=1 ;; -h|--help) echo "Usage: scripts/redeploy.sh --devnet|--testnet [--no-build|--skip-build]" exit 0 ;; *) echo "Unknown flag: $arg" >&2 echo "Usage: scripts/redeploy.sh --devnet|--testnet [--no-build|--skip-build]" >&2 exit 1 ;; esac done if [[ -z "$ENV" ]]; then echo "ERROR: specify --devnet or --testnet" >&2 exit 1 fi # ── Paths ──────────────────────────────────────────────────────────────────── ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" CONF="$ROOT_DIR/scripts/remote-nodes.conf" ARCHIVE="/tmp/network-source.tar.gz" EXTRACT_SCRIPT="$ROOT_DIR/scripts/extract-deploy.sh" die() { echo "ERROR: $*" >&2; exit 1; } need_file() { [[ -f "$1" ]] || die "Missing file: $1"; } need_file "$CONF" need_file "$EXTRACT_SCRIPT" # ── Load nodes from conf ──────────────────────────────────────────────────── HOSTS=() PASSES=() ROLES=() SSH_KEYS=() while IFS='|' read -r env host pass role key; do [[ -z "$env" || "$env" == \#* ]] && continue env="${env%%#*}" env="$(echo "$env" | xargs)" [[ "$env" != "$ENV" ]] && continue HOSTS+=("$host") PASSES+=("$pass") ROLES+=("${role:-node}") SSH_KEYS+=("${key:-}") done < "$CONF" if [[ ${#HOSTS[@]} -eq 0 ]]; then die "No nodes found for environment '$ENV' in $CONF" fi echo "== redeploy.sh ($ENV) — ${#HOSTS[@]} nodes ==" for i in "${!HOSTS[@]}"; do echo " [$i] ${HOSTS[$i]} (${ROLES[$i]})" done # ── Pick hub node ──────────────────────────────────────────────────────────── # Hub = first node that has an SSH key configured (direct SCP from local). # If none have a key, use the first node (via sshpass). HUB_IDX=0 HUB_KEY="" for i in "${!HOSTS[@]}"; do if [[ -n "${SSH_KEYS[$i]}" ]]; then expanded_key="${SSH_KEYS[$i]/#\~/$HOME}" if [[ -f "$expanded_key" ]]; then HUB_IDX=$i HUB_KEY="$expanded_key" break fi fi done HUB_HOST="${HOSTS[$HUB_IDX]}" HUB_PASS="${PASSES[$HUB_IDX]}" echo "Hub: $HUB_HOST (idx=$HUB_IDX, key=${HUB_KEY:-none})" # ── Build ──────────────────────────────────────────────────────────────────── if [[ "$NO_BUILD" -eq 0 ]]; then echo "== build-linux-all ==" (cd "$ROOT_DIR" && make build-linux-all) || { echo "WARN: make build-linux-all failed; continuing if existing bin-linux is acceptable." } else echo "== skipping build (--no-build) ==" fi # ── Generate source archive ───────────────────────────────────────────────── echo "== generate source archive ==" (cd "$ROOT_DIR" && ./scripts/generate-source-archive.sh) need_file "$ARCHIVE" # ── Helper: SSH/SCP to hub ─────────────────────────────────────────────────── SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null) hub_scp() { if [[ -n "$HUB_KEY" ]]; then scp -i "$HUB_KEY" "${SSH_OPTS[@]}" "$@" else sshpass -p "$HUB_PASS" scp "${SSH_OPTS[@]}" "$@" fi } hub_ssh() { if [[ -n "$HUB_KEY" ]]; then ssh -i "$HUB_KEY" "${SSH_OPTS[@]}" "$@" else sshpass -p "$HUB_PASS" ssh "${SSH_OPTS[@]}" "$@" fi } # ── Upload to hub ──────────────────────────────────────────────────────────── echo "== upload archive + extract script + conf to hub ($HUB_HOST) ==" hub_scp "$ARCHIVE" "$EXTRACT_SCRIPT" "$CONF" "$HUB_HOST":/tmp/ # ── Remote: fan-out + extract + rolling upgrade ───────────────────────────── echo "== fan-out + extract + rolling upgrade from hub ==" hub_ssh "$HUB_HOST" "DEPLOY_ENV=$ENV HUB_IDX=$HUB_IDX bash -s" <<'REMOTE' set -euo pipefail export DEBIAN_FRONTEND=noninteractive TAR=/tmp/network-source.tar.gz EX=/tmp/extract-deploy.sh CONF=/tmp/remote-nodes.conf [[ -f "$TAR" ]] || { echo "Missing $TAR on hub"; exit 2; } [[ -f "$EX" ]] || { echo "Missing $EX on hub"; exit 2; } [[ -f "$CONF" ]] || { echo "Missing $CONF on hub"; exit 2; } chmod +x "$EX" || true # Parse conf file on the hub — same format as local hosts=() passes=() idx=0 hub_host="" hub_pass="" while IFS='|' read -r env host pass role key; do [[ -z "$env" || "$env" == \#* ]] && continue env="${env%%#*}" env="$(echo "$env" | xargs)" [[ "$env" != "$DEPLOY_ENV" ]] && continue if [[ $idx -eq $HUB_IDX ]]; then hub_host="$host" hub_pass="$pass" else hosts+=("$host") passes+=("$pass") fi ((idx++)) || true done < "$CONF" echo "Hub: $hub_host (this machine)" echo "Fan-out nodes: ${#hosts[@]}" # Install sshpass on hub if needed if [[ ${#hosts[@]} -gt 0 ]] && ! command -v sshpass >/dev/null 2>&1; then echo "Installing sshpass on hub..." printf '%s\n' "$hub_pass" | sudo -S apt-get update -y >/dev/null printf '%s\n' "$hub_pass" | sudo -S apt-get install -y sshpass >/dev/null fi echo "== fan-out: upload to ${#hosts[@]} nodes ==" upload_failed=() for i in "${!hosts[@]}"; do h="${hosts[$i]}" p="${passes[$i]}" echo " -> $h" if ! sshpass -p "$p" scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$TAR" "$EX" "$h":/tmp/; then echo " !! UPLOAD FAILED: $h" upload_failed+=("$h") fi done echo "== extract on all fan-out nodes ==" for i in "${!hosts[@]}"; do h="${hosts[$i]}" p="${passes[$i]}" echo " -> $h" if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$h" "printf '%s\n' '$p' | sudo -S bash /tmp/extract-deploy.sh >/tmp/extract.log 2>&1 && echo OK"; then echo " !! EXTRACT FAILED: $h" upload_failed+=("$h") fi done if [[ ${#upload_failed[@]} -gt 0 ]]; then echo "" echo "WARNING: ${#upload_failed[@]} nodes had upload/extract failures:" for uf in "${upload_failed[@]}"; do echo " - $uf" done echo "Continuing with rolling restart..." fi echo "== extract on hub ==" printf '%s\n' "$hub_pass" | sudo -S bash "$EX" >/tmp/extract.log 2>&1 # ── Raft state detection ── raft_state() { local h="$1" p="$2" local cmd="curl -s http://localhost:5001/status" local parse_py='import sys,json; j=json.load(sys.stdin); r=j.get("store",{}).get("raft",{}); print((r.get("state") or ""), (r.get("num_peers") or 0), (r.get("voter") is True))' sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$h" "$cmd | python3 -c '$parse_py'" 2>/dev/null || true } echo "== detect leader ==" leader="" leader_pass="" for i in "${!hosts[@]}"; do h="${hosts[$i]}" p="${passes[$i]}" out="$(raft_state "$h" "$p")" echo " $h -> ${out:-NO_OUTPUT}" if [[ "$out" == Leader* ]]; then leader="$h" leader_pass="$p" break fi done # Check hub itself if [[ -z "$leader" ]]; then hub_out="$(curl -s http://localhost:5001/status | python3 -c 'import sys,json; j=json.load(sys.stdin); r=j.get("store",{}).get("raft",{}); print((r.get("state") or ""), (r.get("num_peers") or 0), (r.get("voter") is True))' 2>/dev/null || true)" echo " hub(localhost) -> ${hub_out:-NO_OUTPUT}" if [[ "$hub_out" == Leader* ]]; then leader="HUB" leader_pass="$hub_pass" fi fi if [[ -z "$leader" ]]; then echo "No leader detected. Aborting before upgrades." exit 3 fi echo "Leader: $leader" failed_nodes=() # ── Per-node upgrade flow ── # Uses pre-upgrade (maintenance + leadership transfer + propagation wait) # then stops, deploys binary, and post-upgrade (start + health verification). upgrade_one() { local h="$1" p="$2" echo "== upgrade $h ==" # 1. Pre-upgrade: enter maintenance, transfer leadership, wait for propagation echo " [1/4] pre-upgrade..." if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$h" "printf '%s\n' '$p' | sudo -S orama prod pre-upgrade" 2>&1; then echo " !! pre-upgrade failed on $h (continuing with stop)" fi # 2. Stop all services echo " [2/4] stopping services..." if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$h" "printf '%s\n' '$p' | sudo -S systemctl stop 'debros-*'" 2>&1; then echo " !! stop failed on $h" failed_nodes+=("$h") return 1 fi # 3. Deploy new binary echo " [3/4] deploying binary..." if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$h" "printf '%s\n' '$p' | sudo -S bash /tmp/extract-deploy.sh >/tmp/extract.log 2>&1 && echo OK" 2>&1; then echo " !! extract failed on $h" failed_nodes+=("$h") return 1 fi # 4. Post-upgrade: start services, verify health, exit maintenance echo " [4/4] post-upgrade..." if ! sshpass -p "$p" ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ "$h" "printf '%s\n' '$p' | sudo -S orama prod post-upgrade" 2>&1; then echo " !! post-upgrade failed on $h" failed_nodes+=("$h") return 1 fi echo " OK: $h" } upgrade_hub() { echo "== upgrade hub (localhost) ==" # 1. Pre-upgrade echo " [1/4] pre-upgrade..." if ! (printf '%s\n' "$hub_pass" | sudo -S orama prod pre-upgrade) 2>&1; then echo " !! pre-upgrade failed on hub (continuing with stop)" fi # 2. Stop all services echo " [2/4] stopping services..." if ! (printf '%s\n' "$hub_pass" | sudo -S systemctl stop 'debros-*') 2>&1; then echo " !! stop failed on hub ($hub_host)" failed_nodes+=("$hub_host (hub)") return 1 fi # 3. Deploy new binary echo " [3/4] deploying binary..." if ! (printf '%s\n' "$hub_pass" | sudo -S bash "$EX" >/tmp/extract.log 2>&1); then echo " !! extract failed on hub ($hub_host)" failed_nodes+=("$hub_host (hub)") return 1 fi # 4. Post-upgrade echo " [4/4] post-upgrade..." if ! (printf '%s\n' "$hub_pass" | sudo -S orama prod post-upgrade) 2>&1; then echo " !! post-upgrade failed on hub ($hub_host)" failed_nodes+=("$hub_host (hub)") return 1 fi echo " OK: hub ($hub_host)" } echo "== rolling upgrade (followers first, leader last) ==" for i in "${!hosts[@]}"; do h="${hosts[$i]}" p="${passes[$i]}" [[ "$h" == "$leader" ]] && continue upgrade_one "$h" "$p" || true done # Upgrade hub if not the leader if [[ "$leader" != "HUB" ]]; then upgrade_hub || true fi # Upgrade leader last echo "== upgrade leader last ==" if [[ "$leader" == "HUB" ]]; then upgrade_hub || true else upgrade_one "$leader" "$leader_pass" || true fi # Clean up conf from hub rm -f "$CONF" # ── Report results ── echo "" echo "========================================" if [[ ${#failed_nodes[@]} -gt 0 ]]; then echo "UPGRADE COMPLETED WITH FAILURES (${#failed_nodes[@]} nodes failed):" for fn in "${failed_nodes[@]}"; do echo " FAILED: $fn" done echo "" echo "Recommended actions:" echo " 1. SSH into the failed node(s)" echo " 2. Check logs: sudo orama prod logs node --follow" echo " 3. Manually run: sudo orama prod post-upgrade" echo "========================================" exit 1 else echo "All nodes upgraded successfully." echo "========================================" fi REMOTE echo "== complete =="