Improved scripts and updated makefile

This commit is contained in:
anonpenguin23 2026-02-11 07:09:13 +02:00
parent 490c4f66da
commit 1fb6f9a13e
4 changed files with 45 additions and 790 deletions

View File

@ -84,7 +84,7 @@ test-e2e-quick:
# Network - Distributed P2P Database System
# Makefile for development and build tasks
.PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill
.PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill redeploy-devnet redeploy-testnet release health
VERSION := 0.101.6
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
@ -196,6 +196,42 @@ stop:
kill:
@bash scripts/dev-kill-all.sh
# Deploy to devnet (build + rolling upgrade all nodes)
redeploy-devnet:
@bash scripts/redeploy.sh --devnet
# Deploy to devnet without rebuilding
redeploy-devnet-quick:
@bash scripts/redeploy.sh --devnet --no-build
# Deploy to testnet (build + rolling upgrade all nodes)
redeploy-testnet:
@bash scripts/redeploy.sh --testnet
# Deploy to testnet without rebuilding
redeploy-testnet-quick:
@bash scripts/redeploy.sh --testnet --no-build
# Interactive release workflow (tag + push)
release:
@bash scripts/release.sh
# Check health of all nodes in an environment
# Usage: make health ENV=devnet
health:
@if [ -z "$(ENV)" ]; then \
echo "Usage: make health ENV=devnet|testnet"; \
exit 1; \
fi
@while IFS='|' read -r env host pass role key; do \
[ -z "$$env" ] && continue; \
case "$$env" in \#*) continue;; esac; \
env="$$(echo "$$env" | xargs)"; \
[ "$$env" != "$(ENV)" ] && continue; \
role="$$(echo "$$role" | xargs)"; \
bash scripts/check-node-health.sh "$$host" "$$pass" "$$host ($$role)"; \
done < scripts/remote-nodes.conf
# Help
help:
@echo "Available targets:"
@ -225,6 +261,14 @@ help:
@echo " Example production test:"
@echo " ORAMA_GATEWAY_URL=https://dbrs.space make test-e2e-prod"
@echo ""
@echo "Deployment:"
@echo " make redeploy-devnet - Build + rolling deploy to all devnet nodes"
@echo " make redeploy-devnet-quick - Deploy to devnet without rebuilding"
@echo " make redeploy-testnet - Build + rolling deploy to all testnet nodes"
@echo " make redeploy-testnet-quick- Deploy to testnet without rebuilding"
@echo " make health ENV=devnet - Check health of all nodes in an environment"
@echo " make release - Interactive release workflow (tag + push)"
@echo ""
@echo "Development Management (via orama):"
@echo " ./bin/orama dev status - Show status of all dev services"
@echo " ./bin/orama dev logs <component> [--follow]"

View File

@ -1,298 +0,0 @@
#!/usr/bin/env bash
# block-node.sh - Temporarily block network access to a gateway node (local or remote)
# Usage:
# Local: ./scripts/block-node.sh <node_number> <duration_seconds>
# Remote: ./scripts/block-node.sh --remote <remote_node_number> <duration_seconds>
# Example:
# ./scripts/block-node.sh 1 60 # Block local node-1 (port 6001) for 60 seconds
# ./scripts/block-node.sh --remote 2 120 # Block remote node-2 for 120 seconds
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Remote node configurations - loaded from config file
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG_FILE="$SCRIPT_DIR/remote-nodes.conf"
# Function to get remote node config
get_remote_node_config() {
local node_num="$1"
local field="$2" # "user_host" or "password"
if [ ! -f "$CONFIG_FILE" ]; then
echo ""
return 1
fi
while IFS='|' read -r num user_host password || [ -n "$num" ]; do
# Skip comments and empty lines
[[ "$num" =~ ^#.*$ ]] || [[ -z "$num" ]] && continue
# Trim whitespace
num=$(echo "$num" | xargs)
user_host=$(echo "$user_host" | xargs)
password=$(echo "$password" | xargs)
if [ "$num" = "$node_num" ]; then
if [ "$field" = "user_host" ]; then
echo "$user_host"
elif [ "$field" = "password" ]; then
echo "$password"
fi
return 0
fi
done < "$CONFIG_FILE"
echo ""
return 1
}
# Display usage
usage() {
echo -e "${RED}Error:${NC} Invalid arguments"
echo ""
echo -e "${BLUE}Usage:${NC}"
echo " $0 <node_number> <duration_seconds> # Local mode"
echo " $0 --remote <remote_node_number> <duration_seconds> # Remote mode"
echo ""
echo -e "${GREEN}Local Mode Examples:${NC}"
echo " $0 1 60 # Block local node-1 (port 6001) for 60 seconds"
echo " $0 2 120 # Block local node-2 (port 6002) for 120 seconds"
echo ""
echo -e "${GREEN}Remote Mode Examples:${NC}"
echo " $0 --remote 1 60 # Block remote node-1 (51.83.128.181) for 60 seconds"
echo " $0 --remote 3 120 # Block remote node-3 (83.171.248.66) for 120 seconds"
echo ""
echo -e "${YELLOW}Local Node Mapping:${NC}"
echo " Node 1 -> Port 6001"
echo " Node 2 -> Port 6002"
echo " Node 3 -> Port 6003"
echo " Node 4 -> Port 6004"
echo " Node 5 -> Port 6005"
echo ""
echo -e "${YELLOW}Remote Node Mapping:${NC}"
echo " Remote 1 -> ubuntu@51.83.128.181"
echo " Remote 2 -> root@194.61.28.7"
echo " Remote 3 -> root@83.171.248.66"
echo " Remote 4 -> root@62.72.44.87"
exit 1
}
# Parse arguments
REMOTE_MODE=false
if [ $# -eq 3 ] && [ "$1" == "--remote" ]; then
REMOTE_MODE=true
NODE_NUM="$2"
DURATION="$3"
elif [ $# -eq 2 ]; then
NODE_NUM="$1"
DURATION="$2"
else
usage
fi
# Validate duration
if ! [[ "$DURATION" =~ ^[0-9]+$ ]] || [ "$DURATION" -le 0 ]; then
echo -e "${RED}Error:${NC} Duration must be a positive integer"
exit 1
fi
# Calculate port (local nodes use 6001-6005, remote nodes use 80 and 443)
if [ "$REMOTE_MODE" = true ]; then
# Remote nodes: block standard HTTP/HTTPS ports
PORTS="80 443"
else
# Local nodes: block the specific gateway port
PORT=$((6000 + NODE_NUM))
fi
# Function to block ports on remote server
block_remote_node() {
local node_num="$1"
local duration="$2"
local ports="$3" # Can be space-separated list like "80 443"
# Validate remote node number
if ! [[ "$node_num" =~ ^[1-4]$ ]]; then
echo -e "${RED}Error:${NC} Remote node number must be between 1 and 4"
exit 1
fi
# Get credentials from config file
local user_host=$(get_remote_node_config "$node_num" "user_host")
local password=$(get_remote_node_config "$node_num" "password")
if [ -z "$user_host" ] || [ -z "$password" ]; then
echo -e "${RED}Error:${NC} Configuration for remote node $node_num not found in $CONFIG_FILE"
exit 1
fi
local host="${user_host##*@}"
echo -e "${BLUE}=== Remote Network Blocking Tool ===${NC}"
echo -e "Remote Node: ${GREEN}$node_num${NC} ($user_host)"
echo -e "Ports: ${GREEN}$ports${NC}"
echo -e "Duration: ${GREEN}$duration seconds${NC}"
echo ""
# Check if sshpass is installed
if ! command -v sshpass &> /dev/null; then
echo -e "${RED}Error:${NC} sshpass is not installed. Install it first:"
echo -e " ${YELLOW}macOS:${NC} brew install hudochenkov/sshpass/sshpass"
echo -e " ${YELLOW}Ubuntu/Debian:${NC} sudo apt-get install sshpass"
exit 1
fi
# SSH options - force password authentication only to avoid "too many auth failures"
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o PreferredAuthentications=password -o PubkeyAuthentication=no -o NumberOfPasswordPrompts=1"
echo -e "${YELLOW}Connecting to remote server...${NC}"
# Test connection
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "echo 'Connected successfully' > /dev/null"; then
echo -e "${RED}Error:${NC} Failed to connect to $user_host"
exit 1
fi
echo -e "${GREEN}${NC} Connected to $host"
# Install iptables rules on remote server
echo -e "${YELLOW}Installing iptables rules on remote server...${NC}"
# Build iptables commands for all ports
BLOCK_CMDS=""
for port in $ports; do
BLOCK_CMDS="${BLOCK_CMDS}iptables -I INPUT -p tcp --dport $port -j DROP 2>/dev/null || true; "
BLOCK_CMDS="${BLOCK_CMDS}iptables -I OUTPUT -p tcp --sport $port -j DROP 2>/dev/null || true; "
done
BLOCK_CMDS="${BLOCK_CMDS}echo 'Rules installed'"
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "$BLOCK_CMDS"; then
echo -e "${RED}Error:${NC} Failed to install iptables rules"
exit 1
fi
echo -e "${GREEN}${NC} Ports $ports are now blocked on $host"
echo -e "${YELLOW}Waiting $duration seconds...${NC}"
echo ""
# Show countdown
for ((i=duration; i>0; i--)); do
printf "\r${BLUE}Time remaining: %3d seconds${NC}" "$i"
sleep 1
done
echo ""
echo ""
echo -e "${YELLOW}Removing iptables rules from remote server...${NC}"
# Build iptables removal commands for all ports
UNBLOCK_CMDS=""
for port in $ports; do
UNBLOCK_CMDS="${UNBLOCK_CMDS}iptables -D INPUT -p tcp --dport $port -j DROP 2>/dev/null || true; "
UNBLOCK_CMDS="${UNBLOCK_CMDS}iptables -D OUTPUT -p tcp --sport $port -j DROP 2>/dev/null || true; "
done
UNBLOCK_CMDS="${UNBLOCK_CMDS}echo 'Rules removed'"
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "$UNBLOCK_CMDS"; then
echo -e "${YELLOW}Warning:${NC} Failed to remove some iptables rules. You may need to clean up manually."
else
echo -e "${GREEN}${NC} Ports $ports are now accessible again on $host"
fi
echo ""
echo -e "${GREEN}=== Done! ===${NC}"
echo -e "Remote node ${GREEN}$node_num${NC} ($host) was unreachable for $duration seconds and is now accessible again."
}
# Function to block port locally using process pause (SIGSTOP)
block_local_node() {
local node_num="$1"
local duration="$2"
local port="$3"
# Validate node number
if ! [[ "$node_num" =~ ^[1-5]$ ]]; then
echo -e "${RED}Error:${NC} Local node number must be between 1 and 5"
exit 1
fi
echo -e "${BLUE}=== Local Network Blocking Tool ===${NC}"
echo -e "Node: ${GREEN}node-$node_num${NC}"
echo -e "Port: ${GREEN}$port${NC}"
echo -e "Duration: ${GREEN}$duration seconds${NC}"
echo -e "Method: ${GREEN}Process Pause (SIGSTOP/SIGCONT)${NC}"
echo ""
# Find the process listening on the port
echo -e "${YELLOW}Finding process listening on port $port...${NC}"
# macOS uses different tools than Linux
if [[ "$(uname -s)" == "Darwin" ]]; then
# macOS: use lsof
PID=$(lsof -ti :$port 2>/dev/null | head -1 || echo "")
else
# Linux: use ss or netstat
if command -v ss &> /dev/null; then
PID=$(ss -tlnp | grep ":$port " | grep -oP 'pid=\K[0-9]+' | head -1 || echo "")
else
PID=$(netstat -tlnp 2>/dev/null | grep ":$port " | awk '{print $7}' | cut -d'/' -f1 | head -1 || echo "")
fi
fi
if [ -z "$PID" ]; then
echo -e "${RED}Error:${NC} No process found listening on port $port"
echo -e "Make sure node-$node_num is running first."
exit 1
fi
# Get process name
PROCESS_NAME=$(ps -p $PID -o comm= 2>/dev/null || echo "unknown")
echo -e "${GREEN}${NC} Found process: ${BLUE}$PROCESS_NAME${NC} (PID: ${BLUE}$PID${NC})"
echo ""
# Pause the process
echo -e "${YELLOW}Pausing process (SIGSTOP)...${NC}"
if ! kill -STOP $PID 2>/dev/null; then
echo -e "${RED}Error:${NC} Failed to pause process. You may need sudo privileges."
exit 1
fi
echo -e "${GREEN}${NC} Process paused - node-$node_num is now unreachable"
echo -e "${YELLOW}Waiting $duration seconds...${NC}"
echo ""
# Show countdown
for ((i=duration; i>0; i--)); do
printf "\r${BLUE}Time remaining: %3d seconds${NC}" "$i"
sleep 1
done
echo ""
echo ""
# Resume the process
echo -e "${YELLOW}Resuming process (SIGCONT)...${NC}"
if ! kill -CONT $PID 2>/dev/null; then
echo -e "${YELLOW}Warning:${NC} Failed to resume process. It may have been terminated."
else
echo -e "${GREEN}${NC} Process resumed - node-$node_num is now accessible again"
fi
echo ""
echo -e "${GREEN}=== Done! ===${NC}"
echo -e "Local node ${GREEN}node-$node_num${NC} was unreachable for $duration seconds and is now accessible again."
}
# Main execution
if [ "$REMOTE_MODE" = true ]; then
block_remote_node "$NODE_NUM" "$DURATION" "$PORTS"
else
block_local_node "$NODE_NUM" "$DURATION" "$PORT"
fi

View File

@ -1,112 +0,0 @@
#!/bin/bash
set -e
# Build custom CoreDNS binary with RQLite plugin
# This script compiles CoreDNS with the custom RQLite plugin
COREDNS_VERSION="1.11.1"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
COREDNS_DIR="/tmp/coredns-build"
echo "Building CoreDNS v${COREDNS_VERSION} with RQLite plugin..."
# Clean previous build
rm -rf "$COREDNS_DIR"
mkdir -p "$COREDNS_DIR"
# Clone CoreDNS
echo "Cloning CoreDNS..."
cd "$COREDNS_DIR"
git clone --depth 1 --branch v${COREDNS_VERSION} https://github.com/coredns/coredns.git
cd coredns
# Create plugin.cfg with RQLite plugin
echo "Configuring plugins..."
cat > plugin.cfg <<EOF
# Standard CoreDNS plugins
metadata:metadata
cancel:cancel
tls:tls
reload:reload
nsid:nsid
bufsize:bufsize
root:root
bind:bind
debug:debug
trace:trace
ready:ready
health:health
pprof:pprof
prometheus:metrics
errors:errors
log:log
dnstap:dnstap
local:local
dns64:dns64
acl:acl
any:any
chaos:chaos
loadbalance:loadbalance
cache:cache
rewrite:rewrite
header:header
dnssec:dnssec
autopath:autopath
minimal:minimal
template:template
transfer:transfer
hosts:hosts
route53:route53
azure:azure
clouddns:clouddns
k8s_external:k8s_external
kubernetes:kubernetes
file:file
auto:auto
secondary:secondary
loop:loop
forward:forward
grpc:grpc
erratic:erratic
whoami:whoami
on:github.com/coredns/caddy/onevent
sign:sign
view:view
# Response Rate Limiting (DNS amplification protection)
rrl:rrl
# Custom RQLite plugin
rqlite:github.com/DeBrosOfficial/network/pkg/coredns/rqlite
EOF
# Copy RQLite plugin to CoreDNS
echo "Copying RQLite plugin..."
mkdir -p plugin/rqlite
cp -r "$PROJECT_ROOT/pkg/coredns/rqlite/"* plugin/rqlite/
# Update go.mod to include our dependencies
echo "Updating dependencies..."
go get github.com/rqlite/rqlite-go@latest
go get github.com/coredns/coredns@v${COREDNS_VERSION}
go mod tidy
# Build CoreDNS
echo "Building CoreDNS binary..."
make
# Copy binary to project
echo "Copying binary to project..."
cp coredns "$PROJECT_ROOT/bin/coredns-custom"
chmod +x "$PROJECT_ROOT/bin/coredns-custom"
echo ""
echo "✅ CoreDNS built successfully!"
echo "Binary location: $PROJECT_ROOT/bin/coredns-custom"
echo ""
echo "To deploy:"
echo " 1. Copy binary to /usr/local/bin/coredns on each nameserver node"
echo " 2. Copy configs/coredns/Corefile to /etc/coredns/Corefile"
echo " 3. Start CoreDNS: sudo systemctl start coredns"
echo ""

View File

@ -1,379 +0,0 @@
#!/bin/bash
# Production Cluster Health Check Script
# Tests RQLite, IPFS, and IPFS Cluster connectivity and replication
# Note: We don't use 'set -e' here because we want to continue testing even if individual checks fail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Node IPs - Update these if needed
BOOTSTRAP="${BOOTSTRAP:-51.83.128.181}"
NODE1="${NODE1:-57.128.223.92}"
NODE2="${NODE2:-185.185.83.89}"
ALL_NODES=($BOOTSTRAP $NODE1 $NODE2)
# Counters
PASSED=0
FAILED=0
WARNINGS=0
# Helper functions
print_header() {
echo ""
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}"
}
print_test() {
echo -e "${YELLOW}$1${NC}"
}
print_pass() {
echo -e "${GREEN}$1${NC}"
PASSED=$((PASSED + 1))
}
print_fail() {
echo -e "${RED}$1${NC}"
FAILED=$((FAILED + 1))
}
print_warn() {
echo -e "${YELLOW}$1${NC}"
WARNINGS=$((WARNINGS + 1))
}
print_info() {
echo -e " $1"
}
# Test functions
test_rqlite_status() {
print_header "1. RQLITE CLUSTER STATUS"
local leader_found=false
local follower_count=0
local commit_indices=()
for i in "${!ALL_NODES[@]}"; do
local node="${ALL_NODES[$i]}"
print_test "Testing RQLite on $node"
if ! response=$(curl -s --max-time 5 http://$node:5001/status 2>/dev/null); then
print_fail "Cannot connect to RQLite on $node:5001"
continue
fi
local state=$(echo "$response" | jq -r '.store.raft.state // "unknown"')
local num_peers=$(echo "$response" | jq -r '.store.raft.num_peers // 0')
local commit_index=$(echo "$response" | jq -r '.store.raft.commit_index // 0')
local last_contact=$(echo "$response" | jq -r '.store.raft.last_contact // "N/A"')
local config=$(echo "$response" | jq -r '.store.raft.latest_configuration // "[]"')
local node_count=$(echo "$config" | grep -o "Address" | wc -l | tr -d ' ')
commit_indices+=($commit_index)
print_info "State: $state | Peers: $num_peers | Commit Index: $commit_index | Cluster Nodes: $node_count"
# Check state
if [ "$state" = "Leader" ]; then
leader_found=true
print_pass "Node $node is the Leader"
elif [ "$state" = "Follower" ]; then
follower_count=$((follower_count + 1))
# Check last contact
if [ "$last_contact" != "N/A" ] && [ "$last_contact" != "0" ]; then
print_pass "Node $node is a Follower (last contact: $last_contact)"
else
print_warn "Node $node is Follower but last_contact is $last_contact"
fi
else
print_fail "Node $node has unexpected state: $state"
fi
# Check peer count
if [ "$num_peers" = "2" ]; then
print_pass "Node $node has correct peer count: 2"
else
print_fail "Node $node has incorrect peer count: $num_peers (expected 2)"
fi
# Check cluster configuration
if [ "$node_count" = "3" ]; then
print_pass "Node $node sees all 3 cluster members"
else
print_fail "Node $node only sees $node_count cluster members (expected 3)"
fi
echo ""
done
# Check for exactly 1 leader
if [ "$leader_found" = true ] && [ "$follower_count" = "2" ]; then
print_pass "Cluster has 1 Leader and 2 Followers ✓"
else
print_fail "Invalid cluster state (Leader found: $leader_found, Followers: $follower_count)"
fi
# Check commit index sync
if [ ${#commit_indices[@]} -eq 3 ]; then
local first="${commit_indices[0]}"
local all_same=true
for idx in "${commit_indices[@]}"; do
if [ "$idx" != "$first" ]; then
all_same=false
break
fi
done
if [ "$all_same" = true ]; then
print_pass "All nodes have synced commit index: $first"
else
print_warn "Commit indices differ: ${commit_indices[*]} (might be normal if writes are happening)"
fi
fi
}
test_rqlite_replication() {
print_header "2. RQLITE REPLICATION TEST"
print_test "Creating test table and inserting data on leader ($BOOTSTRAP)"
# Create table
if ! response=$(curl -s --max-time 5 -XPOST "http://$BOOTSTRAP:5001/db/execute" \
-H "Content-Type: application/json" \
-d '[["CREATE TABLE IF NOT EXISTS test_cluster_health (id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, node TEXT, value TEXT)"]]' 2>/dev/null); then
print_fail "Failed to create table"
return
fi
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
local error=$(echo "$response" | jq -r '.results[0].error')
if [[ "$error" != "table test_cluster_health already exists" ]]; then
print_fail "Table creation error: $error"
return
fi
fi
print_pass "Table exists"
# Insert test data
local test_value="test_$(date +%s)"
if ! response=$(curl -s --max-time 5 -XPOST "http://$BOOTSTRAP:5001/db/execute" \
-H "Content-Type: application/json" \
-d "[
[\"INSERT INTO test_cluster_health (timestamp, node, value) VALUES (datetime('now'), 'bootstrap', '$test_value')\"]
]" 2>/dev/null); then
print_fail "Failed to insert data"
return
fi
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
local error=$(echo "$response" | jq -r '.results[0].error')
print_fail "Insert error: $error"
return
fi
print_pass "Data inserted: $test_value"
# Wait for replication
print_info "Waiting 2 seconds for replication..."
sleep 2
# Query from all nodes
for node in "${ALL_NODES[@]}"; do
print_test "Reading from $node"
if ! response=$(curl -s --max-time 5 -XPOST "http://$node:5001/db/query?level=weak" \
-H "Content-Type: application/json" \
-d "[\"SELECT * FROM test_cluster_health WHERE value = '$test_value' LIMIT 1\"]" 2>/dev/null); then
print_fail "Failed to query from $node"
continue
fi
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
local error=$(echo "$response" | jq -r '.results[0].error')
print_fail "Query error on $node: $error"
continue
fi
local row_count=$(echo "$response" | jq -r '.results[0].values | length // 0')
if [ "$row_count" = "1" ]; then
local retrieved_value=$(echo "$response" | jq -r '.results[0].values[0][3] // ""')
if [ "$retrieved_value" = "$test_value" ]; then
print_pass "Data replicated correctly to $node"
else
print_fail "Data mismatch on $node (got: $retrieved_value, expected: $test_value)"
fi
else
print_fail "Expected 1 row from $node, got $row_count"
fi
done
}
test_ipfs_status() {
print_header "3. IPFS DAEMON STATUS"
for node in "${ALL_NODES[@]}"; do
print_test "Testing IPFS on $node"
if ! response=$(curl -s --max-time 5 -X POST http://$node:4501/api/v0/id 2>/dev/null); then
print_fail "Cannot connect to IPFS on $node:4501"
continue
fi
local peer_id=$(echo "$response" | jq -r '.ID // "unknown"')
local addr_count=$(echo "$response" | jq -r '.Addresses | length // 0')
local agent=$(echo "$response" | jq -r '.AgentVersion // "unknown"')
if [ "$peer_id" != "unknown" ]; then
print_pass "IPFS running on $node (ID: ${peer_id:0:12}...)"
print_info "Agent: $agent | Addresses: $addr_count"
else
print_fail "IPFS not responding correctly on $node"
fi
done
}
test_ipfs_swarm() {
print_header "4. IPFS SWARM CONNECTIVITY"
for node in "${ALL_NODES[@]}"; do
print_test "Checking IPFS swarm peers on $node"
if ! response=$(curl -s --max-time 5 -X POST http://$node:4501/api/v0/swarm/peers 2>/dev/null); then
print_fail "Failed to get swarm peers from $node"
continue
fi
local peer_count=$(echo "$response" | jq -r '.Peers | length // 0')
if [ "$peer_count" = "2" ]; then
print_pass "Node $node connected to 2 IPFS peers"
elif [ "$peer_count" -gt "0" ]; then
print_warn "Node $node connected to $peer_count IPFS peers (expected 2)"
else
print_fail "Node $node has no IPFS swarm peers"
fi
done
}
test_ipfs_cluster_status() {
print_header "5. IPFS CLUSTER STATUS"
for node in "${ALL_NODES[@]}"; do
print_test "Testing IPFS Cluster on $node"
if ! response=$(curl -s --max-time 5 http://$node:9094/id 2>/dev/null); then
print_fail "Cannot connect to IPFS Cluster on $node:9094"
continue
fi
local cluster_id=$(echo "$response" | jq -r '.id // "unknown"')
local cluster_peers=$(echo "$response" | jq -r '.cluster_peers | length // 0')
local version=$(echo "$response" | jq -r '.version // "unknown"')
if [ "$cluster_id" != "unknown" ]; then
print_pass "IPFS Cluster running on $node (ID: ${cluster_id:0:12}...)"
print_info "Version: $version | Cluster Peers: $cluster_peers"
if [ "$cluster_peers" = "3" ]; then
print_pass "Node $node sees all 3 cluster peers"
else
print_warn "Node $node sees $cluster_peers cluster peers (expected 3)"
fi
else
print_fail "IPFS Cluster not responding correctly on $node"
fi
done
}
test_ipfs_cluster_pins() {
print_header "6. IPFS CLUSTER PIN CONSISTENCY"
local pin_counts=()
for node in "${ALL_NODES[@]}"; do
print_test "Checking pins on $node"
if ! response=$(curl -s --max-time 5 http://$node:9094/pins 2>/dev/null); then
print_fail "Failed to get pins from $node"
pin_counts+=(0)
continue
fi
local pin_count=$(echo "$response" | jq -r 'length // 0')
pin_counts+=($pin_count)
print_pass "Node $node has $pin_count pins"
done
# Check if all nodes have same pin count
if [ ${#pin_counts[@]} -eq 3 ]; then
local first="${pin_counts[0]}"
local all_same=true
for count in "${pin_counts[@]}"; do
if [ "$count" != "$first" ]; then
all_same=false
break
fi
done
if [ "$all_same" = true ]; then
print_pass "All nodes have consistent pin count: $first"
else
print_warn "Pin counts differ: ${pin_counts[*]} (might be syncing)"
fi
fi
}
print_summary() {
print_header "TEST SUMMARY"
echo ""
echo -e "${GREEN}Passed: $PASSED${NC}"
echo -e "${YELLOW}Warnings: $WARNINGS${NC}"
echo -e "${RED}Failed: $FAILED${NC}"
echo ""
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}🎉 All critical tests passed! Cluster is healthy.${NC}"
exit 0
elif [ $FAILED -le 2 ]; then
echo -e "${YELLOW}⚠️ Some tests failed. Review the output above.${NC}"
exit 1
else
echo -e "${RED}❌ Multiple failures detected. Cluster needs attention.${NC}"
exit 2
fi
}
# Main execution
main() {
echo ""
echo -e "${BLUE}╔════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ DEBROS Production Cluster Health Check ║${NC}"
echo -e "${BLUE}╚════════════════════════════════════════════╝${NC}"
echo ""
echo "Testing cluster:"
echo " Bootstrap: $BOOTSTRAP"
echo " Node 1: $NODE1"
echo " Node 2: $NODE2"
test_rqlite_status
test_rqlite_replication
test_ipfs_status
test_ipfs_swarm
test_ipfs_cluster_status
test_ipfs_cluster_pins
print_summary
}
# Run main
main