mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-03-17 05:13:01 +00:00
Improved scripts and updated makefile
This commit is contained in:
parent
490c4f66da
commit
1fb6f9a13e
46
Makefile
46
Makefile
@ -84,7 +84,7 @@ test-e2e-quick:
|
||||
# Network - Distributed P2P Database System
|
||||
# Makefile for development and build tasks
|
||||
|
||||
.PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill
|
||||
.PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill redeploy-devnet redeploy-testnet release health
|
||||
|
||||
VERSION := 0.101.6
|
||||
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
|
||||
@ -196,6 +196,42 @@ stop:
|
||||
kill:
|
||||
@bash scripts/dev-kill-all.sh
|
||||
|
||||
# Deploy to devnet (build + rolling upgrade all nodes)
|
||||
redeploy-devnet:
|
||||
@bash scripts/redeploy.sh --devnet
|
||||
|
||||
# Deploy to devnet without rebuilding
|
||||
redeploy-devnet-quick:
|
||||
@bash scripts/redeploy.sh --devnet --no-build
|
||||
|
||||
# Deploy to testnet (build + rolling upgrade all nodes)
|
||||
redeploy-testnet:
|
||||
@bash scripts/redeploy.sh --testnet
|
||||
|
||||
# Deploy to testnet without rebuilding
|
||||
redeploy-testnet-quick:
|
||||
@bash scripts/redeploy.sh --testnet --no-build
|
||||
|
||||
# Interactive release workflow (tag + push)
|
||||
release:
|
||||
@bash scripts/release.sh
|
||||
|
||||
# Check health of all nodes in an environment
|
||||
# Usage: make health ENV=devnet
|
||||
health:
|
||||
@if [ -z "$(ENV)" ]; then \
|
||||
echo "Usage: make health ENV=devnet|testnet"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@while IFS='|' read -r env host pass role key; do \
|
||||
[ -z "$$env" ] && continue; \
|
||||
case "$$env" in \#*) continue;; esac; \
|
||||
env="$$(echo "$$env" | xargs)"; \
|
||||
[ "$$env" != "$(ENV)" ] && continue; \
|
||||
role="$$(echo "$$role" | xargs)"; \
|
||||
bash scripts/check-node-health.sh "$$host" "$$pass" "$$host ($$role)"; \
|
||||
done < scripts/remote-nodes.conf
|
||||
|
||||
# Help
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
@ -225,6 +261,14 @@ help:
|
||||
@echo " Example production test:"
|
||||
@echo " ORAMA_GATEWAY_URL=https://dbrs.space make test-e2e-prod"
|
||||
@echo ""
|
||||
@echo "Deployment:"
|
||||
@echo " make redeploy-devnet - Build + rolling deploy to all devnet nodes"
|
||||
@echo " make redeploy-devnet-quick - Deploy to devnet without rebuilding"
|
||||
@echo " make redeploy-testnet - Build + rolling deploy to all testnet nodes"
|
||||
@echo " make redeploy-testnet-quick- Deploy to testnet without rebuilding"
|
||||
@echo " make health ENV=devnet - Check health of all nodes in an environment"
|
||||
@echo " make release - Interactive release workflow (tag + push)"
|
||||
@echo ""
|
||||
@echo "Development Management (via orama):"
|
||||
@echo " ./bin/orama dev status - Show status of all dev services"
|
||||
@echo " ./bin/orama dev logs <component> [--follow]"
|
||||
|
||||
@ -1,298 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# block-node.sh - Temporarily block network access to a gateway node (local or remote)
|
||||
# Usage:
|
||||
# Local: ./scripts/block-node.sh <node_number> <duration_seconds>
|
||||
# Remote: ./scripts/block-node.sh --remote <remote_node_number> <duration_seconds>
|
||||
# Example:
|
||||
# ./scripts/block-node.sh 1 60 # Block local node-1 (port 6001) for 60 seconds
|
||||
# ./scripts/block-node.sh --remote 2 120 # Block remote node-2 for 120 seconds
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Remote node configurations - loaded from config file
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
CONFIG_FILE="$SCRIPT_DIR/remote-nodes.conf"
|
||||
|
||||
# Function to get remote node config
|
||||
get_remote_node_config() {
|
||||
local node_num="$1"
|
||||
local field="$2" # "user_host" or "password"
|
||||
|
||||
if [ ! -f "$CONFIG_FILE" ]; then
|
||||
echo ""
|
||||
return 1
|
||||
fi
|
||||
|
||||
while IFS='|' read -r num user_host password || [ -n "$num" ]; do
|
||||
# Skip comments and empty lines
|
||||
[[ "$num" =~ ^#.*$ ]] || [[ -z "$num" ]] && continue
|
||||
# Trim whitespace
|
||||
num=$(echo "$num" | xargs)
|
||||
user_host=$(echo "$user_host" | xargs)
|
||||
password=$(echo "$password" | xargs)
|
||||
|
||||
if [ "$num" = "$node_num" ]; then
|
||||
if [ "$field" = "user_host" ]; then
|
||||
echo "$user_host"
|
||||
elif [ "$field" = "password" ]; then
|
||||
echo "$password"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
done < "$CONFIG_FILE"
|
||||
|
||||
echo ""
|
||||
return 1
|
||||
}
|
||||
|
||||
# Display usage
|
||||
usage() {
|
||||
echo -e "${RED}Error:${NC} Invalid arguments"
|
||||
echo ""
|
||||
echo -e "${BLUE}Usage:${NC}"
|
||||
echo " $0 <node_number> <duration_seconds> # Local mode"
|
||||
echo " $0 --remote <remote_node_number> <duration_seconds> # Remote mode"
|
||||
echo ""
|
||||
echo -e "${GREEN}Local Mode Examples:${NC}"
|
||||
echo " $0 1 60 # Block local node-1 (port 6001) for 60 seconds"
|
||||
echo " $0 2 120 # Block local node-2 (port 6002) for 120 seconds"
|
||||
echo ""
|
||||
echo -e "${GREEN}Remote Mode Examples:${NC}"
|
||||
echo " $0 --remote 1 60 # Block remote node-1 (51.83.128.181) for 60 seconds"
|
||||
echo " $0 --remote 3 120 # Block remote node-3 (83.171.248.66) for 120 seconds"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Local Node Mapping:${NC}"
|
||||
echo " Node 1 -> Port 6001"
|
||||
echo " Node 2 -> Port 6002"
|
||||
echo " Node 3 -> Port 6003"
|
||||
echo " Node 4 -> Port 6004"
|
||||
echo " Node 5 -> Port 6005"
|
||||
echo ""
|
||||
echo -e "${YELLOW}Remote Node Mapping:${NC}"
|
||||
echo " Remote 1 -> ubuntu@51.83.128.181"
|
||||
echo " Remote 2 -> root@194.61.28.7"
|
||||
echo " Remote 3 -> root@83.171.248.66"
|
||||
echo " Remote 4 -> root@62.72.44.87"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse arguments
|
||||
REMOTE_MODE=false
|
||||
if [ $# -eq 3 ] && [ "$1" == "--remote" ]; then
|
||||
REMOTE_MODE=true
|
||||
NODE_NUM="$2"
|
||||
DURATION="$3"
|
||||
elif [ $# -eq 2 ]; then
|
||||
NODE_NUM="$1"
|
||||
DURATION="$2"
|
||||
else
|
||||
usage
|
||||
fi
|
||||
|
||||
# Validate duration
|
||||
if ! [[ "$DURATION" =~ ^[0-9]+$ ]] || [ "$DURATION" -le 0 ]; then
|
||||
echo -e "${RED}Error:${NC} Duration must be a positive integer"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Calculate port (local nodes use 6001-6005, remote nodes use 80 and 443)
|
||||
if [ "$REMOTE_MODE" = true ]; then
|
||||
# Remote nodes: block standard HTTP/HTTPS ports
|
||||
PORTS="80 443"
|
||||
else
|
||||
# Local nodes: block the specific gateway port
|
||||
PORT=$((6000 + NODE_NUM))
|
||||
fi
|
||||
|
||||
# Function to block ports on remote server
|
||||
block_remote_node() {
|
||||
local node_num="$1"
|
||||
local duration="$2"
|
||||
local ports="$3" # Can be space-separated list like "80 443"
|
||||
|
||||
# Validate remote node number
|
||||
if ! [[ "$node_num" =~ ^[1-4]$ ]]; then
|
||||
echo -e "${RED}Error:${NC} Remote node number must be between 1 and 4"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get credentials from config file
|
||||
local user_host=$(get_remote_node_config "$node_num" "user_host")
|
||||
local password=$(get_remote_node_config "$node_num" "password")
|
||||
|
||||
if [ -z "$user_host" ] || [ -z "$password" ]; then
|
||||
echo -e "${RED}Error:${NC} Configuration for remote node $node_num not found in $CONFIG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local host="${user_host##*@}"
|
||||
|
||||
echo -e "${BLUE}=== Remote Network Blocking Tool ===${NC}"
|
||||
echo -e "Remote Node: ${GREEN}$node_num${NC} ($user_host)"
|
||||
echo -e "Ports: ${GREEN}$ports${NC}"
|
||||
echo -e "Duration: ${GREEN}$duration seconds${NC}"
|
||||
echo ""
|
||||
|
||||
# Check if sshpass is installed
|
||||
if ! command -v sshpass &> /dev/null; then
|
||||
echo -e "${RED}Error:${NC} sshpass is not installed. Install it first:"
|
||||
echo -e " ${YELLOW}macOS:${NC} brew install hudochenkov/sshpass/sshpass"
|
||||
echo -e " ${YELLOW}Ubuntu/Debian:${NC} sudo apt-get install sshpass"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# SSH options - force password authentication only to avoid "too many auth failures"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o PreferredAuthentications=password -o PubkeyAuthentication=no -o NumberOfPasswordPrompts=1"
|
||||
|
||||
echo -e "${YELLOW}Connecting to remote server...${NC}"
|
||||
|
||||
# Test connection
|
||||
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "echo 'Connected successfully' > /dev/null"; then
|
||||
echo -e "${RED}Error:${NC} Failed to connect to $user_host"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓${NC} Connected to $host"
|
||||
|
||||
# Install iptables rules on remote server
|
||||
echo -e "${YELLOW}Installing iptables rules on remote server...${NC}"
|
||||
|
||||
# Build iptables commands for all ports
|
||||
BLOCK_CMDS=""
|
||||
for port in $ports; do
|
||||
BLOCK_CMDS="${BLOCK_CMDS}iptables -I INPUT -p tcp --dport $port -j DROP 2>/dev/null || true; "
|
||||
BLOCK_CMDS="${BLOCK_CMDS}iptables -I OUTPUT -p tcp --sport $port -j DROP 2>/dev/null || true; "
|
||||
done
|
||||
BLOCK_CMDS="${BLOCK_CMDS}echo 'Rules installed'"
|
||||
|
||||
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "$BLOCK_CMDS"; then
|
||||
echo -e "${RED}Error:${NC} Failed to install iptables rules"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓${NC} Ports $ports are now blocked on $host"
|
||||
echo -e "${YELLOW}Waiting $duration seconds...${NC}"
|
||||
echo ""
|
||||
|
||||
# Show countdown
|
||||
for ((i=duration; i>0; i--)); do
|
||||
printf "\r${BLUE}Time remaining: %3d seconds${NC}" "$i"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo ""
|
||||
echo -e "${YELLOW}Removing iptables rules from remote server...${NC}"
|
||||
|
||||
# Build iptables removal commands for all ports
|
||||
UNBLOCK_CMDS=""
|
||||
for port in $ports; do
|
||||
UNBLOCK_CMDS="${UNBLOCK_CMDS}iptables -D INPUT -p tcp --dport $port -j DROP 2>/dev/null || true; "
|
||||
UNBLOCK_CMDS="${UNBLOCK_CMDS}iptables -D OUTPUT -p tcp --sport $port -j DROP 2>/dev/null || true; "
|
||||
done
|
||||
UNBLOCK_CMDS="${UNBLOCK_CMDS}echo 'Rules removed'"
|
||||
|
||||
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "$UNBLOCK_CMDS"; then
|
||||
echo -e "${YELLOW}Warning:${NC} Failed to remove some iptables rules. You may need to clean up manually."
|
||||
else
|
||||
echo -e "${GREEN}✓${NC} Ports $ports are now accessible again on $host"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}=== Done! ===${NC}"
|
||||
echo -e "Remote node ${GREEN}$node_num${NC} ($host) was unreachable for $duration seconds and is now accessible again."
|
||||
}
|
||||
|
||||
# Function to block port locally using process pause (SIGSTOP)
|
||||
block_local_node() {
|
||||
local node_num="$1"
|
||||
local duration="$2"
|
||||
local port="$3"
|
||||
|
||||
# Validate node number
|
||||
if ! [[ "$node_num" =~ ^[1-5]$ ]]; then
|
||||
echo -e "${RED}Error:${NC} Local node number must be between 1 and 5"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${BLUE}=== Local Network Blocking Tool ===${NC}"
|
||||
echo -e "Node: ${GREEN}node-$node_num${NC}"
|
||||
echo -e "Port: ${GREEN}$port${NC}"
|
||||
echo -e "Duration: ${GREEN}$duration seconds${NC}"
|
||||
echo -e "Method: ${GREEN}Process Pause (SIGSTOP/SIGCONT)${NC}"
|
||||
echo ""
|
||||
|
||||
# Find the process listening on the port
|
||||
echo -e "${YELLOW}Finding process listening on port $port...${NC}"
|
||||
|
||||
# macOS uses different tools than Linux
|
||||
if [[ "$(uname -s)" == "Darwin" ]]; then
|
||||
# macOS: use lsof
|
||||
PID=$(lsof -ti :$port 2>/dev/null | head -1 || echo "")
|
||||
else
|
||||
# Linux: use ss or netstat
|
||||
if command -v ss &> /dev/null; then
|
||||
PID=$(ss -tlnp | grep ":$port " | grep -oP 'pid=\K[0-9]+' | head -1 || echo "")
|
||||
else
|
||||
PID=$(netstat -tlnp 2>/dev/null | grep ":$port " | awk '{print $7}' | cut -d'/' -f1 | head -1 || echo "")
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$PID" ]; then
|
||||
echo -e "${RED}Error:${NC} No process found listening on port $port"
|
||||
echo -e "Make sure node-$node_num is running first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get process name
|
||||
PROCESS_NAME=$(ps -p $PID -o comm= 2>/dev/null || echo "unknown")
|
||||
|
||||
echo -e "${GREEN}✓${NC} Found process: ${BLUE}$PROCESS_NAME${NC} (PID: ${BLUE}$PID${NC})"
|
||||
echo ""
|
||||
|
||||
# Pause the process
|
||||
echo -e "${YELLOW}Pausing process (SIGSTOP)...${NC}"
|
||||
if ! kill -STOP $PID 2>/dev/null; then
|
||||
echo -e "${RED}Error:${NC} Failed to pause process. You may need sudo privileges."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓${NC} Process paused - node-$node_num is now unreachable"
|
||||
echo -e "${YELLOW}Waiting $duration seconds...${NC}"
|
||||
echo ""
|
||||
|
||||
# Show countdown
|
||||
for ((i=duration; i>0; i--)); do
|
||||
printf "\r${BLUE}Time remaining: %3d seconds${NC}" "$i"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
# Resume the process
|
||||
echo -e "${YELLOW}Resuming process (SIGCONT)...${NC}"
|
||||
if ! kill -CONT $PID 2>/dev/null; then
|
||||
echo -e "${YELLOW}Warning:${NC} Failed to resume process. It may have been terminated."
|
||||
else
|
||||
echo -e "${GREEN}✓${NC} Process resumed - node-$node_num is now accessible again"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}=== Done! ===${NC}"
|
||||
echo -e "Local node ${GREEN}node-$node_num${NC} was unreachable for $duration seconds and is now accessible again."
|
||||
}
|
||||
|
||||
# Main execution
|
||||
if [ "$REMOTE_MODE" = true ]; then
|
||||
block_remote_node "$NODE_NUM" "$DURATION" "$PORTS"
|
||||
else
|
||||
block_local_node "$NODE_NUM" "$DURATION" "$PORT"
|
||||
fi
|
||||
@ -1,112 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Build custom CoreDNS binary with RQLite plugin
|
||||
# This script compiles CoreDNS with the custom RQLite plugin
|
||||
|
||||
COREDNS_VERSION="1.11.1"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
COREDNS_DIR="/tmp/coredns-build"
|
||||
|
||||
echo "Building CoreDNS v${COREDNS_VERSION} with RQLite plugin..."
|
||||
|
||||
# Clean previous build
|
||||
rm -rf "$COREDNS_DIR"
|
||||
mkdir -p "$COREDNS_DIR"
|
||||
|
||||
# Clone CoreDNS
|
||||
echo "Cloning CoreDNS..."
|
||||
cd "$COREDNS_DIR"
|
||||
git clone --depth 1 --branch v${COREDNS_VERSION} https://github.com/coredns/coredns.git
|
||||
cd coredns
|
||||
|
||||
# Create plugin.cfg with RQLite plugin
|
||||
echo "Configuring plugins..."
|
||||
cat > plugin.cfg <<EOF
|
||||
# Standard CoreDNS plugins
|
||||
metadata:metadata
|
||||
cancel:cancel
|
||||
tls:tls
|
||||
reload:reload
|
||||
nsid:nsid
|
||||
bufsize:bufsize
|
||||
root:root
|
||||
bind:bind
|
||||
debug:debug
|
||||
trace:trace
|
||||
ready:ready
|
||||
health:health
|
||||
pprof:pprof
|
||||
prometheus:metrics
|
||||
errors:errors
|
||||
log:log
|
||||
dnstap:dnstap
|
||||
local:local
|
||||
dns64:dns64
|
||||
acl:acl
|
||||
any:any
|
||||
chaos:chaos
|
||||
loadbalance:loadbalance
|
||||
cache:cache
|
||||
rewrite:rewrite
|
||||
header:header
|
||||
dnssec:dnssec
|
||||
autopath:autopath
|
||||
minimal:minimal
|
||||
template:template
|
||||
transfer:transfer
|
||||
hosts:hosts
|
||||
route53:route53
|
||||
azure:azure
|
||||
clouddns:clouddns
|
||||
k8s_external:k8s_external
|
||||
kubernetes:kubernetes
|
||||
file:file
|
||||
auto:auto
|
||||
secondary:secondary
|
||||
loop:loop
|
||||
forward:forward
|
||||
grpc:grpc
|
||||
erratic:erratic
|
||||
whoami:whoami
|
||||
on:github.com/coredns/caddy/onevent
|
||||
sign:sign
|
||||
view:view
|
||||
|
||||
# Response Rate Limiting (DNS amplification protection)
|
||||
rrl:rrl
|
||||
|
||||
# Custom RQLite plugin
|
||||
rqlite:github.com/DeBrosOfficial/network/pkg/coredns/rqlite
|
||||
EOF
|
||||
|
||||
# Copy RQLite plugin to CoreDNS
|
||||
echo "Copying RQLite plugin..."
|
||||
mkdir -p plugin/rqlite
|
||||
cp -r "$PROJECT_ROOT/pkg/coredns/rqlite/"* plugin/rqlite/
|
||||
|
||||
# Update go.mod to include our dependencies
|
||||
echo "Updating dependencies..."
|
||||
go get github.com/rqlite/rqlite-go@latest
|
||||
go get github.com/coredns/coredns@v${COREDNS_VERSION}
|
||||
go mod tidy
|
||||
|
||||
# Build CoreDNS
|
||||
echo "Building CoreDNS binary..."
|
||||
make
|
||||
|
||||
# Copy binary to project
|
||||
echo "Copying binary to project..."
|
||||
cp coredns "$PROJECT_ROOT/bin/coredns-custom"
|
||||
chmod +x "$PROJECT_ROOT/bin/coredns-custom"
|
||||
|
||||
echo ""
|
||||
echo "✅ CoreDNS built successfully!"
|
||||
echo "Binary location: $PROJECT_ROOT/bin/coredns-custom"
|
||||
echo ""
|
||||
echo "To deploy:"
|
||||
echo " 1. Copy binary to /usr/local/bin/coredns on each nameserver node"
|
||||
echo " 2. Copy configs/coredns/Corefile to /etc/coredns/Corefile"
|
||||
echo " 3. Start CoreDNS: sudo systemctl start coredns"
|
||||
echo ""
|
||||
@ -1,379 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Production Cluster Health Check Script
|
||||
# Tests RQLite, IPFS, and IPFS Cluster connectivity and replication
|
||||
|
||||
# Note: We don't use 'set -e' here because we want to continue testing even if individual checks fail
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Node IPs - Update these if needed
|
||||
BOOTSTRAP="${BOOTSTRAP:-51.83.128.181}"
|
||||
NODE1="${NODE1:-57.128.223.92}"
|
||||
NODE2="${NODE2:-185.185.83.89}"
|
||||
|
||||
ALL_NODES=($BOOTSTRAP $NODE1 $NODE2)
|
||||
|
||||
# Counters
|
||||
PASSED=0
|
||||
FAILED=0
|
||||
WARNINGS=0
|
||||
|
||||
# Helper functions
|
||||
print_header() {
|
||||
echo ""
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}$1${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
}
|
||||
|
||||
print_test() {
|
||||
echo -e "${YELLOW}▶ $1${NC}"
|
||||
}
|
||||
|
||||
print_pass() {
|
||||
echo -e "${GREEN}✓ $1${NC}"
|
||||
PASSED=$((PASSED + 1))
|
||||
}
|
||||
|
||||
print_fail() {
|
||||
echo -e "${RED}✗ $1${NC}"
|
||||
FAILED=$((FAILED + 1))
|
||||
}
|
||||
|
||||
print_warn() {
|
||||
echo -e "${YELLOW}⚠ $1${NC}"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
}
|
||||
|
||||
print_info() {
|
||||
echo -e " $1"
|
||||
}
|
||||
|
||||
# Test functions
|
||||
test_rqlite_status() {
|
||||
print_header "1. RQLITE CLUSTER STATUS"
|
||||
|
||||
local leader_found=false
|
||||
local follower_count=0
|
||||
local commit_indices=()
|
||||
|
||||
for i in "${!ALL_NODES[@]}"; do
|
||||
local node="${ALL_NODES[$i]}"
|
||||
print_test "Testing RQLite on $node"
|
||||
|
||||
if ! response=$(curl -s --max-time 5 http://$node:5001/status 2>/dev/null); then
|
||||
print_fail "Cannot connect to RQLite on $node:5001"
|
||||
continue
|
||||
fi
|
||||
|
||||
local state=$(echo "$response" | jq -r '.store.raft.state // "unknown"')
|
||||
local num_peers=$(echo "$response" | jq -r '.store.raft.num_peers // 0')
|
||||
local commit_index=$(echo "$response" | jq -r '.store.raft.commit_index // 0')
|
||||
local last_contact=$(echo "$response" | jq -r '.store.raft.last_contact // "N/A"')
|
||||
local config=$(echo "$response" | jq -r '.store.raft.latest_configuration // "[]"')
|
||||
local node_count=$(echo "$config" | grep -o "Address" | wc -l | tr -d ' ')
|
||||
|
||||
commit_indices+=($commit_index)
|
||||
|
||||
print_info "State: $state | Peers: $num_peers | Commit Index: $commit_index | Cluster Nodes: $node_count"
|
||||
|
||||
# Check state
|
||||
if [ "$state" = "Leader" ]; then
|
||||
leader_found=true
|
||||
print_pass "Node $node is the Leader"
|
||||
elif [ "$state" = "Follower" ]; then
|
||||
follower_count=$((follower_count + 1))
|
||||
# Check last contact
|
||||
if [ "$last_contact" != "N/A" ] && [ "$last_contact" != "0" ]; then
|
||||
print_pass "Node $node is a Follower (last contact: $last_contact)"
|
||||
else
|
||||
print_warn "Node $node is Follower but last_contact is $last_contact"
|
||||
fi
|
||||
else
|
||||
print_fail "Node $node has unexpected state: $state"
|
||||
fi
|
||||
|
||||
# Check peer count
|
||||
if [ "$num_peers" = "2" ]; then
|
||||
print_pass "Node $node has correct peer count: 2"
|
||||
else
|
||||
print_fail "Node $node has incorrect peer count: $num_peers (expected 2)"
|
||||
fi
|
||||
|
||||
# Check cluster configuration
|
||||
if [ "$node_count" = "3" ]; then
|
||||
print_pass "Node $node sees all 3 cluster members"
|
||||
else
|
||||
print_fail "Node $node only sees $node_count cluster members (expected 3)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
done
|
||||
|
||||
# Check for exactly 1 leader
|
||||
if [ "$leader_found" = true ] && [ "$follower_count" = "2" ]; then
|
||||
print_pass "Cluster has 1 Leader and 2 Followers ✓"
|
||||
else
|
||||
print_fail "Invalid cluster state (Leader found: $leader_found, Followers: $follower_count)"
|
||||
fi
|
||||
|
||||
# Check commit index sync
|
||||
if [ ${#commit_indices[@]} -eq 3 ]; then
|
||||
local first="${commit_indices[0]}"
|
||||
local all_same=true
|
||||
for idx in "${commit_indices[@]}"; do
|
||||
if [ "$idx" != "$first" ]; then
|
||||
all_same=false
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$all_same" = true ]; then
|
||||
print_pass "All nodes have synced commit index: $first"
|
||||
else
|
||||
print_warn "Commit indices differ: ${commit_indices[*]} (might be normal if writes are happening)"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
test_rqlite_replication() {
|
||||
print_header "2. RQLITE REPLICATION TEST"
|
||||
|
||||
print_test "Creating test table and inserting data on leader ($BOOTSTRAP)"
|
||||
|
||||
# Create table
|
||||
if ! response=$(curl -s --max-time 5 -XPOST "http://$BOOTSTRAP:5001/db/execute" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '[["CREATE TABLE IF NOT EXISTS test_cluster_health (id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, node TEXT, value TEXT)"]]' 2>/dev/null); then
|
||||
print_fail "Failed to create table"
|
||||
return
|
||||
fi
|
||||
|
||||
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
|
||||
local error=$(echo "$response" | jq -r '.results[0].error')
|
||||
if [[ "$error" != "table test_cluster_health already exists" ]]; then
|
||||
print_fail "Table creation error: $error"
|
||||
return
|
||||
fi
|
||||
fi
|
||||
print_pass "Table exists"
|
||||
|
||||
# Insert test data
|
||||
local test_value="test_$(date +%s)"
|
||||
if ! response=$(curl -s --max-time 5 -XPOST "http://$BOOTSTRAP:5001/db/execute" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "[
|
||||
[\"INSERT INTO test_cluster_health (timestamp, node, value) VALUES (datetime('now'), 'bootstrap', '$test_value')\"]
|
||||
]" 2>/dev/null); then
|
||||
print_fail "Failed to insert data"
|
||||
return
|
||||
fi
|
||||
|
||||
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
|
||||
local error=$(echo "$response" | jq -r '.results[0].error')
|
||||
print_fail "Insert error: $error"
|
||||
return
|
||||
fi
|
||||
print_pass "Data inserted: $test_value"
|
||||
|
||||
# Wait for replication
|
||||
print_info "Waiting 2 seconds for replication..."
|
||||
sleep 2
|
||||
|
||||
# Query from all nodes
|
||||
for node in "${ALL_NODES[@]}"; do
|
||||
print_test "Reading from $node"
|
||||
|
||||
if ! response=$(curl -s --max-time 5 -XPOST "http://$node:5001/db/query?level=weak" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "[\"SELECT * FROM test_cluster_health WHERE value = '$test_value' LIMIT 1\"]" 2>/dev/null); then
|
||||
print_fail "Failed to query from $node"
|
||||
continue
|
||||
fi
|
||||
|
||||
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
|
||||
local error=$(echo "$response" | jq -r '.results[0].error')
|
||||
print_fail "Query error on $node: $error"
|
||||
continue
|
||||
fi
|
||||
|
||||
local row_count=$(echo "$response" | jq -r '.results[0].values | length // 0')
|
||||
if [ "$row_count" = "1" ]; then
|
||||
local retrieved_value=$(echo "$response" | jq -r '.results[0].values[0][3] // ""')
|
||||
if [ "$retrieved_value" = "$test_value" ]; then
|
||||
print_pass "Data replicated correctly to $node"
|
||||
else
|
||||
print_fail "Data mismatch on $node (got: $retrieved_value, expected: $test_value)"
|
||||
fi
|
||||
else
|
||||
print_fail "Expected 1 row from $node, got $row_count"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
test_ipfs_status() {
|
||||
print_header "3. IPFS DAEMON STATUS"
|
||||
|
||||
for node in "${ALL_NODES[@]}"; do
|
||||
print_test "Testing IPFS on $node"
|
||||
|
||||
if ! response=$(curl -s --max-time 5 -X POST http://$node:4501/api/v0/id 2>/dev/null); then
|
||||
print_fail "Cannot connect to IPFS on $node:4501"
|
||||
continue
|
||||
fi
|
||||
|
||||
local peer_id=$(echo "$response" | jq -r '.ID // "unknown"')
|
||||
local addr_count=$(echo "$response" | jq -r '.Addresses | length // 0')
|
||||
local agent=$(echo "$response" | jq -r '.AgentVersion // "unknown"')
|
||||
|
||||
if [ "$peer_id" != "unknown" ]; then
|
||||
print_pass "IPFS running on $node (ID: ${peer_id:0:12}...)"
|
||||
print_info "Agent: $agent | Addresses: $addr_count"
|
||||
else
|
||||
print_fail "IPFS not responding correctly on $node"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
test_ipfs_swarm() {
|
||||
print_header "4. IPFS SWARM CONNECTIVITY"
|
||||
|
||||
for node in "${ALL_NODES[@]}"; do
|
||||
print_test "Checking IPFS swarm peers on $node"
|
||||
|
||||
if ! response=$(curl -s --max-time 5 -X POST http://$node:4501/api/v0/swarm/peers 2>/dev/null); then
|
||||
print_fail "Failed to get swarm peers from $node"
|
||||
continue
|
||||
fi
|
||||
|
||||
local peer_count=$(echo "$response" | jq -r '.Peers | length // 0')
|
||||
|
||||
if [ "$peer_count" = "2" ]; then
|
||||
print_pass "Node $node connected to 2 IPFS peers"
|
||||
elif [ "$peer_count" -gt "0" ]; then
|
||||
print_warn "Node $node connected to $peer_count IPFS peers (expected 2)"
|
||||
else
|
||||
print_fail "Node $node has no IPFS swarm peers"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
test_ipfs_cluster_status() {
|
||||
print_header "5. IPFS CLUSTER STATUS"
|
||||
|
||||
for node in "${ALL_NODES[@]}"; do
|
||||
print_test "Testing IPFS Cluster on $node"
|
||||
|
||||
if ! response=$(curl -s --max-time 5 http://$node:9094/id 2>/dev/null); then
|
||||
print_fail "Cannot connect to IPFS Cluster on $node:9094"
|
||||
continue
|
||||
fi
|
||||
|
||||
local cluster_id=$(echo "$response" | jq -r '.id // "unknown"')
|
||||
local cluster_peers=$(echo "$response" | jq -r '.cluster_peers | length // 0')
|
||||
local version=$(echo "$response" | jq -r '.version // "unknown"')
|
||||
|
||||
if [ "$cluster_id" != "unknown" ]; then
|
||||
print_pass "IPFS Cluster running on $node (ID: ${cluster_id:0:12}...)"
|
||||
print_info "Version: $version | Cluster Peers: $cluster_peers"
|
||||
|
||||
if [ "$cluster_peers" = "3" ]; then
|
||||
print_pass "Node $node sees all 3 cluster peers"
|
||||
else
|
||||
print_warn "Node $node sees $cluster_peers cluster peers (expected 3)"
|
||||
fi
|
||||
else
|
||||
print_fail "IPFS Cluster not responding correctly on $node"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
test_ipfs_cluster_pins() {
|
||||
print_header "6. IPFS CLUSTER PIN CONSISTENCY"
|
||||
|
||||
local pin_counts=()
|
||||
|
||||
for node in "${ALL_NODES[@]}"; do
|
||||
print_test "Checking pins on $node"
|
||||
|
||||
if ! response=$(curl -s --max-time 5 http://$node:9094/pins 2>/dev/null); then
|
||||
print_fail "Failed to get pins from $node"
|
||||
pin_counts+=(0)
|
||||
continue
|
||||
fi
|
||||
|
||||
local pin_count=$(echo "$response" | jq -r 'length // 0')
|
||||
pin_counts+=($pin_count)
|
||||
print_pass "Node $node has $pin_count pins"
|
||||
done
|
||||
|
||||
# Check if all nodes have same pin count
|
||||
if [ ${#pin_counts[@]} -eq 3 ]; then
|
||||
local first="${pin_counts[0]}"
|
||||
local all_same=true
|
||||
for count in "${pin_counts[@]}"; do
|
||||
if [ "$count" != "$first" ]; then
|
||||
all_same=false
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$all_same" = true ]; then
|
||||
print_pass "All nodes have consistent pin count: $first"
|
||||
else
|
||||
print_warn "Pin counts differ: ${pin_counts[*]} (might be syncing)"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
print_summary() {
|
||||
print_header "TEST SUMMARY"
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}Passed: $PASSED${NC}"
|
||||
echo -e "${YELLOW}Warnings: $WARNINGS${NC}"
|
||||
echo -e "${RED}Failed: $FAILED${NC}"
|
||||
echo ""
|
||||
|
||||
if [ $FAILED -eq 0 ]; then
|
||||
echo -e "${GREEN}🎉 All critical tests passed! Cluster is healthy.${NC}"
|
||||
exit 0
|
||||
elif [ $FAILED -le 2 ]; then
|
||||
echo -e "${YELLOW}⚠️ Some tests failed. Review the output above.${NC}"
|
||||
exit 1
|
||||
else
|
||||
echo -e "${RED}❌ Multiple failures detected. Cluster needs attention.${NC}"
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
# Main execution
|
||||
main() {
|
||||
echo ""
|
||||
echo -e "${BLUE}╔════════════════════════════════════════════╗${NC}"
|
||||
echo -e "${BLUE}║ DEBROS Production Cluster Health Check ║${NC}"
|
||||
echo -e "${BLUE}╚════════════════════════════════════════════╝${NC}"
|
||||
echo ""
|
||||
echo "Testing cluster:"
|
||||
echo " Bootstrap: $BOOTSTRAP"
|
||||
echo " Node 1: $NODE1"
|
||||
echo " Node 2: $NODE2"
|
||||
|
||||
test_rqlite_status
|
||||
test_rqlite_replication
|
||||
test_ipfs_status
|
||||
test_ipfs_swarm
|
||||
test_ipfs_cluster_status
|
||||
test_ipfs_cluster_pins
|
||||
print_summary
|
||||
}
|
||||
|
||||
# Run main
|
||||
main
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user