mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-03-17 12:06:57 +00:00
Improved scripts and updated makefile
This commit is contained in:
parent
490c4f66da
commit
1fb6f9a13e
46
Makefile
46
Makefile
@ -84,7 +84,7 @@ test-e2e-quick:
|
|||||||
# Network - Distributed P2P Database System
|
# Network - Distributed P2P Database System
|
||||||
# Makefile for development and build tasks
|
# Makefile for development and build tasks
|
||||||
|
|
||||||
.PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill
|
.PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill redeploy-devnet redeploy-testnet release health
|
||||||
|
|
||||||
VERSION := 0.101.6
|
VERSION := 0.101.6
|
||||||
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
|
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
|
||||||
@ -196,6 +196,42 @@ stop:
|
|||||||
kill:
|
kill:
|
||||||
@bash scripts/dev-kill-all.sh
|
@bash scripts/dev-kill-all.sh
|
||||||
|
|
||||||
|
# Deploy to devnet (build + rolling upgrade all nodes)
|
||||||
|
redeploy-devnet:
|
||||||
|
@bash scripts/redeploy.sh --devnet
|
||||||
|
|
||||||
|
# Deploy to devnet without rebuilding
|
||||||
|
redeploy-devnet-quick:
|
||||||
|
@bash scripts/redeploy.sh --devnet --no-build
|
||||||
|
|
||||||
|
# Deploy to testnet (build + rolling upgrade all nodes)
|
||||||
|
redeploy-testnet:
|
||||||
|
@bash scripts/redeploy.sh --testnet
|
||||||
|
|
||||||
|
# Deploy to testnet without rebuilding
|
||||||
|
redeploy-testnet-quick:
|
||||||
|
@bash scripts/redeploy.sh --testnet --no-build
|
||||||
|
|
||||||
|
# Interactive release workflow (tag + push)
|
||||||
|
release:
|
||||||
|
@bash scripts/release.sh
|
||||||
|
|
||||||
|
# Check health of all nodes in an environment
|
||||||
|
# Usage: make health ENV=devnet
|
||||||
|
health:
|
||||||
|
@if [ -z "$(ENV)" ]; then \
|
||||||
|
echo "Usage: make health ENV=devnet|testnet"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
@while IFS='|' read -r env host pass role key; do \
|
||||||
|
[ -z "$$env" ] && continue; \
|
||||||
|
case "$$env" in \#*) continue;; esac; \
|
||||||
|
env="$$(echo "$$env" | xargs)"; \
|
||||||
|
[ "$$env" != "$(ENV)" ] && continue; \
|
||||||
|
role="$$(echo "$$role" | xargs)"; \
|
||||||
|
bash scripts/check-node-health.sh "$$host" "$$pass" "$$host ($$role)"; \
|
||||||
|
done < scripts/remote-nodes.conf
|
||||||
|
|
||||||
# Help
|
# Help
|
||||||
help:
|
help:
|
||||||
@echo "Available targets:"
|
@echo "Available targets:"
|
||||||
@ -225,6 +261,14 @@ help:
|
|||||||
@echo " Example production test:"
|
@echo " Example production test:"
|
||||||
@echo " ORAMA_GATEWAY_URL=https://dbrs.space make test-e2e-prod"
|
@echo " ORAMA_GATEWAY_URL=https://dbrs.space make test-e2e-prod"
|
||||||
@echo ""
|
@echo ""
|
||||||
|
@echo "Deployment:"
|
||||||
|
@echo " make redeploy-devnet - Build + rolling deploy to all devnet nodes"
|
||||||
|
@echo " make redeploy-devnet-quick - Deploy to devnet without rebuilding"
|
||||||
|
@echo " make redeploy-testnet - Build + rolling deploy to all testnet nodes"
|
||||||
|
@echo " make redeploy-testnet-quick- Deploy to testnet without rebuilding"
|
||||||
|
@echo " make health ENV=devnet - Check health of all nodes in an environment"
|
||||||
|
@echo " make release - Interactive release workflow (tag + push)"
|
||||||
|
@echo ""
|
||||||
@echo "Development Management (via orama):"
|
@echo "Development Management (via orama):"
|
||||||
@echo " ./bin/orama dev status - Show status of all dev services"
|
@echo " ./bin/orama dev status - Show status of all dev services"
|
||||||
@echo " ./bin/orama dev logs <component> [--follow]"
|
@echo " ./bin/orama dev logs <component> [--follow]"
|
||||||
|
|||||||
@ -1,298 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# block-node.sh - Temporarily block network access to a gateway node (local or remote)
|
|
||||||
# Usage:
|
|
||||||
# Local: ./scripts/block-node.sh <node_number> <duration_seconds>
|
|
||||||
# Remote: ./scripts/block-node.sh --remote <remote_node_number> <duration_seconds>
|
|
||||||
# Example:
|
|
||||||
# ./scripts/block-node.sh 1 60 # Block local node-1 (port 6001) for 60 seconds
|
|
||||||
# ./scripts/block-node.sh --remote 2 120 # Block remote node-2 for 120 seconds
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# Remote node configurations - loaded from config file
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
CONFIG_FILE="$SCRIPT_DIR/remote-nodes.conf"
|
|
||||||
|
|
||||||
# Function to get remote node config
|
|
||||||
get_remote_node_config() {
|
|
||||||
local node_num="$1"
|
|
||||||
local field="$2" # "user_host" or "password"
|
|
||||||
|
|
||||||
if [ ! -f "$CONFIG_FILE" ]; then
|
|
||||||
echo ""
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
while IFS='|' read -r num user_host password || [ -n "$num" ]; do
|
|
||||||
# Skip comments and empty lines
|
|
||||||
[[ "$num" =~ ^#.*$ ]] || [[ -z "$num" ]] && continue
|
|
||||||
# Trim whitespace
|
|
||||||
num=$(echo "$num" | xargs)
|
|
||||||
user_host=$(echo "$user_host" | xargs)
|
|
||||||
password=$(echo "$password" | xargs)
|
|
||||||
|
|
||||||
if [ "$num" = "$node_num" ]; then
|
|
||||||
if [ "$field" = "user_host" ]; then
|
|
||||||
echo "$user_host"
|
|
||||||
elif [ "$field" = "password" ]; then
|
|
||||||
echo "$password"
|
|
||||||
fi
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
done < "$CONFIG_FILE"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Display usage
|
|
||||||
usage() {
|
|
||||||
echo -e "${RED}Error:${NC} Invalid arguments"
|
|
||||||
echo ""
|
|
||||||
echo -e "${BLUE}Usage:${NC}"
|
|
||||||
echo " $0 <node_number> <duration_seconds> # Local mode"
|
|
||||||
echo " $0 --remote <remote_node_number> <duration_seconds> # Remote mode"
|
|
||||||
echo ""
|
|
||||||
echo -e "${GREEN}Local Mode Examples:${NC}"
|
|
||||||
echo " $0 1 60 # Block local node-1 (port 6001) for 60 seconds"
|
|
||||||
echo " $0 2 120 # Block local node-2 (port 6002) for 120 seconds"
|
|
||||||
echo ""
|
|
||||||
echo -e "${GREEN}Remote Mode Examples:${NC}"
|
|
||||||
echo " $0 --remote 1 60 # Block remote node-1 (51.83.128.181) for 60 seconds"
|
|
||||||
echo " $0 --remote 3 120 # Block remote node-3 (83.171.248.66) for 120 seconds"
|
|
||||||
echo ""
|
|
||||||
echo -e "${YELLOW}Local Node Mapping:${NC}"
|
|
||||||
echo " Node 1 -> Port 6001"
|
|
||||||
echo " Node 2 -> Port 6002"
|
|
||||||
echo " Node 3 -> Port 6003"
|
|
||||||
echo " Node 4 -> Port 6004"
|
|
||||||
echo " Node 5 -> Port 6005"
|
|
||||||
echo ""
|
|
||||||
echo -e "${YELLOW}Remote Node Mapping:${NC}"
|
|
||||||
echo " Remote 1 -> ubuntu@51.83.128.181"
|
|
||||||
echo " Remote 2 -> root@194.61.28.7"
|
|
||||||
echo " Remote 3 -> root@83.171.248.66"
|
|
||||||
echo " Remote 4 -> root@62.72.44.87"
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse arguments
|
|
||||||
REMOTE_MODE=false
|
|
||||||
if [ $# -eq 3 ] && [ "$1" == "--remote" ]; then
|
|
||||||
REMOTE_MODE=true
|
|
||||||
NODE_NUM="$2"
|
|
||||||
DURATION="$3"
|
|
||||||
elif [ $# -eq 2 ]; then
|
|
||||||
NODE_NUM="$1"
|
|
||||||
DURATION="$2"
|
|
||||||
else
|
|
||||||
usage
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Validate duration
|
|
||||||
if ! [[ "$DURATION" =~ ^[0-9]+$ ]] || [ "$DURATION" -le 0 ]; then
|
|
||||||
echo -e "${RED}Error:${NC} Duration must be a positive integer"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Calculate port (local nodes use 6001-6005, remote nodes use 80 and 443)
|
|
||||||
if [ "$REMOTE_MODE" = true ]; then
|
|
||||||
# Remote nodes: block standard HTTP/HTTPS ports
|
|
||||||
PORTS="80 443"
|
|
||||||
else
|
|
||||||
# Local nodes: block the specific gateway port
|
|
||||||
PORT=$((6000 + NODE_NUM))
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Function to block ports on remote server
|
|
||||||
block_remote_node() {
|
|
||||||
local node_num="$1"
|
|
||||||
local duration="$2"
|
|
||||||
local ports="$3" # Can be space-separated list like "80 443"
|
|
||||||
|
|
||||||
# Validate remote node number
|
|
||||||
if ! [[ "$node_num" =~ ^[1-4]$ ]]; then
|
|
||||||
echo -e "${RED}Error:${NC} Remote node number must be between 1 and 4"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get credentials from config file
|
|
||||||
local user_host=$(get_remote_node_config "$node_num" "user_host")
|
|
||||||
local password=$(get_remote_node_config "$node_num" "password")
|
|
||||||
|
|
||||||
if [ -z "$user_host" ] || [ -z "$password" ]; then
|
|
||||||
echo -e "${RED}Error:${NC} Configuration for remote node $node_num not found in $CONFIG_FILE"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
local host="${user_host##*@}"
|
|
||||||
|
|
||||||
echo -e "${BLUE}=== Remote Network Blocking Tool ===${NC}"
|
|
||||||
echo -e "Remote Node: ${GREEN}$node_num${NC} ($user_host)"
|
|
||||||
echo -e "Ports: ${GREEN}$ports${NC}"
|
|
||||||
echo -e "Duration: ${GREEN}$duration seconds${NC}"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Check if sshpass is installed
|
|
||||||
if ! command -v sshpass &> /dev/null; then
|
|
||||||
echo -e "${RED}Error:${NC} sshpass is not installed. Install it first:"
|
|
||||||
echo -e " ${YELLOW}macOS:${NC} brew install hudochenkov/sshpass/sshpass"
|
|
||||||
echo -e " ${YELLOW}Ubuntu/Debian:${NC} sudo apt-get install sshpass"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# SSH options - force password authentication only to avoid "too many auth failures"
|
|
||||||
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR -o PreferredAuthentications=password -o PubkeyAuthentication=no -o NumberOfPasswordPrompts=1"
|
|
||||||
|
|
||||||
echo -e "${YELLOW}Connecting to remote server...${NC}"
|
|
||||||
|
|
||||||
# Test connection
|
|
||||||
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "echo 'Connected successfully' > /dev/null"; then
|
|
||||||
echo -e "${RED}Error:${NC} Failed to connect to $user_host"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${GREEN}✓${NC} Connected to $host"
|
|
||||||
|
|
||||||
# Install iptables rules on remote server
|
|
||||||
echo -e "${YELLOW}Installing iptables rules on remote server...${NC}"
|
|
||||||
|
|
||||||
# Build iptables commands for all ports
|
|
||||||
BLOCK_CMDS=""
|
|
||||||
for port in $ports; do
|
|
||||||
BLOCK_CMDS="${BLOCK_CMDS}iptables -I INPUT -p tcp --dport $port -j DROP 2>/dev/null || true; "
|
|
||||||
BLOCK_CMDS="${BLOCK_CMDS}iptables -I OUTPUT -p tcp --sport $port -j DROP 2>/dev/null || true; "
|
|
||||||
done
|
|
||||||
BLOCK_CMDS="${BLOCK_CMDS}echo 'Rules installed'"
|
|
||||||
|
|
||||||
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "$BLOCK_CMDS"; then
|
|
||||||
echo -e "${RED}Error:${NC} Failed to install iptables rules"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${GREEN}✓${NC} Ports $ports are now blocked on $host"
|
|
||||||
echo -e "${YELLOW}Waiting $duration seconds...${NC}"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Show countdown
|
|
||||||
for ((i=duration; i>0; i--)); do
|
|
||||||
printf "\r${BLUE}Time remaining: %3d seconds${NC}" "$i"
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
echo -e "${YELLOW}Removing iptables rules from remote server...${NC}"
|
|
||||||
|
|
||||||
# Build iptables removal commands for all ports
|
|
||||||
UNBLOCK_CMDS=""
|
|
||||||
for port in $ports; do
|
|
||||||
UNBLOCK_CMDS="${UNBLOCK_CMDS}iptables -D INPUT -p tcp --dport $port -j DROP 2>/dev/null || true; "
|
|
||||||
UNBLOCK_CMDS="${UNBLOCK_CMDS}iptables -D OUTPUT -p tcp --sport $port -j DROP 2>/dev/null || true; "
|
|
||||||
done
|
|
||||||
UNBLOCK_CMDS="${UNBLOCK_CMDS}echo 'Rules removed'"
|
|
||||||
|
|
||||||
if ! sshpass -p "$password" ssh $SSH_OPTS "$user_host" "$UNBLOCK_CMDS"; then
|
|
||||||
echo -e "${YELLOW}Warning:${NC} Failed to remove some iptables rules. You may need to clean up manually."
|
|
||||||
else
|
|
||||||
echo -e "${GREEN}✓${NC} Ports $ports are now accessible again on $host"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e "${GREEN}=== Done! ===${NC}"
|
|
||||||
echo -e "Remote node ${GREEN}$node_num${NC} ($host) was unreachable for $duration seconds and is now accessible again."
|
|
||||||
}
|
|
||||||
|
|
||||||
# Function to block port locally using process pause (SIGSTOP)
|
|
||||||
block_local_node() {
|
|
||||||
local node_num="$1"
|
|
||||||
local duration="$2"
|
|
||||||
local port="$3"
|
|
||||||
|
|
||||||
# Validate node number
|
|
||||||
if ! [[ "$node_num" =~ ^[1-5]$ ]]; then
|
|
||||||
echo -e "${RED}Error:${NC} Local node number must be between 1 and 5"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${BLUE}=== Local Network Blocking Tool ===${NC}"
|
|
||||||
echo -e "Node: ${GREEN}node-$node_num${NC}"
|
|
||||||
echo -e "Port: ${GREEN}$port${NC}"
|
|
||||||
echo -e "Duration: ${GREEN}$duration seconds${NC}"
|
|
||||||
echo -e "Method: ${GREEN}Process Pause (SIGSTOP/SIGCONT)${NC}"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Find the process listening on the port
|
|
||||||
echo -e "${YELLOW}Finding process listening on port $port...${NC}"
|
|
||||||
|
|
||||||
# macOS uses different tools than Linux
|
|
||||||
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
||||||
# macOS: use lsof
|
|
||||||
PID=$(lsof -ti :$port 2>/dev/null | head -1 || echo "")
|
|
||||||
else
|
|
||||||
# Linux: use ss or netstat
|
|
||||||
if command -v ss &> /dev/null; then
|
|
||||||
PID=$(ss -tlnp | grep ":$port " | grep -oP 'pid=\K[0-9]+' | head -1 || echo "")
|
|
||||||
else
|
|
||||||
PID=$(netstat -tlnp 2>/dev/null | grep ":$port " | awk '{print $7}' | cut -d'/' -f1 | head -1 || echo "")
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -z "$PID" ]; then
|
|
||||||
echo -e "${RED}Error:${NC} No process found listening on port $port"
|
|
||||||
echo -e "Make sure node-$node_num is running first."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Get process name
|
|
||||||
PROCESS_NAME=$(ps -p $PID -o comm= 2>/dev/null || echo "unknown")
|
|
||||||
|
|
||||||
echo -e "${GREEN}✓${NC} Found process: ${BLUE}$PROCESS_NAME${NC} (PID: ${BLUE}$PID${NC})"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Pause the process
|
|
||||||
echo -e "${YELLOW}Pausing process (SIGSTOP)...${NC}"
|
|
||||||
if ! kill -STOP $PID 2>/dev/null; then
|
|
||||||
echo -e "${RED}Error:${NC} Failed to pause process. You may need sudo privileges."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo -e "${GREEN}✓${NC} Process paused - node-$node_num is now unreachable"
|
|
||||||
echo -e "${YELLOW}Waiting $duration seconds...${NC}"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Show countdown
|
|
||||||
for ((i=duration; i>0; i--)); do
|
|
||||||
printf "\r${BLUE}Time remaining: %3d seconds${NC}" "$i"
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Resume the process
|
|
||||||
echo -e "${YELLOW}Resuming process (SIGCONT)...${NC}"
|
|
||||||
if ! kill -CONT $PID 2>/dev/null; then
|
|
||||||
echo -e "${YELLOW}Warning:${NC} Failed to resume process. It may have been terminated."
|
|
||||||
else
|
|
||||||
echo -e "${GREEN}✓${NC} Process resumed - node-$node_num is now accessible again"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e "${GREEN}=== Done! ===${NC}"
|
|
||||||
echo -e "Local node ${GREEN}node-$node_num${NC} was unreachable for $duration seconds and is now accessible again."
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main execution
|
|
||||||
if [ "$REMOTE_MODE" = true ]; then
|
|
||||||
block_remote_node "$NODE_NUM" "$DURATION" "$PORTS"
|
|
||||||
else
|
|
||||||
block_local_node "$NODE_NUM" "$DURATION" "$PORT"
|
|
||||||
fi
|
|
||||||
@ -1,112 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Build custom CoreDNS binary with RQLite plugin
|
|
||||||
# This script compiles CoreDNS with the custom RQLite plugin
|
|
||||||
|
|
||||||
COREDNS_VERSION="1.11.1"
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
|
||||||
COREDNS_DIR="/tmp/coredns-build"
|
|
||||||
|
|
||||||
echo "Building CoreDNS v${COREDNS_VERSION} with RQLite plugin..."
|
|
||||||
|
|
||||||
# Clean previous build
|
|
||||||
rm -rf "$COREDNS_DIR"
|
|
||||||
mkdir -p "$COREDNS_DIR"
|
|
||||||
|
|
||||||
# Clone CoreDNS
|
|
||||||
echo "Cloning CoreDNS..."
|
|
||||||
cd "$COREDNS_DIR"
|
|
||||||
git clone --depth 1 --branch v${COREDNS_VERSION} https://github.com/coredns/coredns.git
|
|
||||||
cd coredns
|
|
||||||
|
|
||||||
# Create plugin.cfg with RQLite plugin
|
|
||||||
echo "Configuring plugins..."
|
|
||||||
cat > plugin.cfg <<EOF
|
|
||||||
# Standard CoreDNS plugins
|
|
||||||
metadata:metadata
|
|
||||||
cancel:cancel
|
|
||||||
tls:tls
|
|
||||||
reload:reload
|
|
||||||
nsid:nsid
|
|
||||||
bufsize:bufsize
|
|
||||||
root:root
|
|
||||||
bind:bind
|
|
||||||
debug:debug
|
|
||||||
trace:trace
|
|
||||||
ready:ready
|
|
||||||
health:health
|
|
||||||
pprof:pprof
|
|
||||||
prometheus:metrics
|
|
||||||
errors:errors
|
|
||||||
log:log
|
|
||||||
dnstap:dnstap
|
|
||||||
local:local
|
|
||||||
dns64:dns64
|
|
||||||
acl:acl
|
|
||||||
any:any
|
|
||||||
chaos:chaos
|
|
||||||
loadbalance:loadbalance
|
|
||||||
cache:cache
|
|
||||||
rewrite:rewrite
|
|
||||||
header:header
|
|
||||||
dnssec:dnssec
|
|
||||||
autopath:autopath
|
|
||||||
minimal:minimal
|
|
||||||
template:template
|
|
||||||
transfer:transfer
|
|
||||||
hosts:hosts
|
|
||||||
route53:route53
|
|
||||||
azure:azure
|
|
||||||
clouddns:clouddns
|
|
||||||
k8s_external:k8s_external
|
|
||||||
kubernetes:kubernetes
|
|
||||||
file:file
|
|
||||||
auto:auto
|
|
||||||
secondary:secondary
|
|
||||||
loop:loop
|
|
||||||
forward:forward
|
|
||||||
grpc:grpc
|
|
||||||
erratic:erratic
|
|
||||||
whoami:whoami
|
|
||||||
on:github.com/coredns/caddy/onevent
|
|
||||||
sign:sign
|
|
||||||
view:view
|
|
||||||
|
|
||||||
# Response Rate Limiting (DNS amplification protection)
|
|
||||||
rrl:rrl
|
|
||||||
|
|
||||||
# Custom RQLite plugin
|
|
||||||
rqlite:github.com/DeBrosOfficial/network/pkg/coredns/rqlite
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# Copy RQLite plugin to CoreDNS
|
|
||||||
echo "Copying RQLite plugin..."
|
|
||||||
mkdir -p plugin/rqlite
|
|
||||||
cp -r "$PROJECT_ROOT/pkg/coredns/rqlite/"* plugin/rqlite/
|
|
||||||
|
|
||||||
# Update go.mod to include our dependencies
|
|
||||||
echo "Updating dependencies..."
|
|
||||||
go get github.com/rqlite/rqlite-go@latest
|
|
||||||
go get github.com/coredns/coredns@v${COREDNS_VERSION}
|
|
||||||
go mod tidy
|
|
||||||
|
|
||||||
# Build CoreDNS
|
|
||||||
echo "Building CoreDNS binary..."
|
|
||||||
make
|
|
||||||
|
|
||||||
# Copy binary to project
|
|
||||||
echo "Copying binary to project..."
|
|
||||||
cp coredns "$PROJECT_ROOT/bin/coredns-custom"
|
|
||||||
chmod +x "$PROJECT_ROOT/bin/coredns-custom"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "✅ CoreDNS built successfully!"
|
|
||||||
echo "Binary location: $PROJECT_ROOT/bin/coredns-custom"
|
|
||||||
echo ""
|
|
||||||
echo "To deploy:"
|
|
||||||
echo " 1. Copy binary to /usr/local/bin/coredns on each nameserver node"
|
|
||||||
echo " 2. Copy configs/coredns/Corefile to /etc/coredns/Corefile"
|
|
||||||
echo " 3. Start CoreDNS: sudo systemctl start coredns"
|
|
||||||
echo ""
|
|
||||||
@ -1,379 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Production Cluster Health Check Script
|
|
||||||
# Tests RQLite, IPFS, and IPFS Cluster connectivity and replication
|
|
||||||
|
|
||||||
# Note: We don't use 'set -e' here because we want to continue testing even if individual checks fail
|
|
||||||
|
|
||||||
# Colors for output
|
|
||||||
RED='\033[0;31m'
|
|
||||||
GREEN='\033[0;32m'
|
|
||||||
YELLOW='\033[1;33m'
|
|
||||||
BLUE='\033[0;34m'
|
|
||||||
NC='\033[0m' # No Color
|
|
||||||
|
|
||||||
# Node IPs - Update these if needed
|
|
||||||
BOOTSTRAP="${BOOTSTRAP:-51.83.128.181}"
|
|
||||||
NODE1="${NODE1:-57.128.223.92}"
|
|
||||||
NODE2="${NODE2:-185.185.83.89}"
|
|
||||||
|
|
||||||
ALL_NODES=($BOOTSTRAP $NODE1 $NODE2)
|
|
||||||
|
|
||||||
# Counters
|
|
||||||
PASSED=0
|
|
||||||
FAILED=0
|
|
||||||
WARNINGS=0
|
|
||||||
|
|
||||||
# Helper functions
|
|
||||||
print_header() {
|
|
||||||
echo ""
|
|
||||||
echo -e "${BLUE}========================================${NC}"
|
|
||||||
echo -e "${BLUE}$1${NC}"
|
|
||||||
echo -e "${BLUE}========================================${NC}"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_test() {
|
|
||||||
echo -e "${YELLOW}▶ $1${NC}"
|
|
||||||
}
|
|
||||||
|
|
||||||
print_pass() {
|
|
||||||
echo -e "${GREEN}✓ $1${NC}"
|
|
||||||
PASSED=$((PASSED + 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
print_fail() {
|
|
||||||
echo -e "${RED}✗ $1${NC}"
|
|
||||||
FAILED=$((FAILED + 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
print_warn() {
|
|
||||||
echo -e "${YELLOW}⚠ $1${NC}"
|
|
||||||
WARNINGS=$((WARNINGS + 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
print_info() {
|
|
||||||
echo -e " $1"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Test functions
|
|
||||||
test_rqlite_status() {
|
|
||||||
print_header "1. RQLITE CLUSTER STATUS"
|
|
||||||
|
|
||||||
local leader_found=false
|
|
||||||
local follower_count=0
|
|
||||||
local commit_indices=()
|
|
||||||
|
|
||||||
for i in "${!ALL_NODES[@]}"; do
|
|
||||||
local node="${ALL_NODES[$i]}"
|
|
||||||
print_test "Testing RQLite on $node"
|
|
||||||
|
|
||||||
if ! response=$(curl -s --max-time 5 http://$node:5001/status 2>/dev/null); then
|
|
||||||
print_fail "Cannot connect to RQLite on $node:5001"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
local state=$(echo "$response" | jq -r '.store.raft.state // "unknown"')
|
|
||||||
local num_peers=$(echo "$response" | jq -r '.store.raft.num_peers // 0')
|
|
||||||
local commit_index=$(echo "$response" | jq -r '.store.raft.commit_index // 0')
|
|
||||||
local last_contact=$(echo "$response" | jq -r '.store.raft.last_contact // "N/A"')
|
|
||||||
local config=$(echo "$response" | jq -r '.store.raft.latest_configuration // "[]"')
|
|
||||||
local node_count=$(echo "$config" | grep -o "Address" | wc -l | tr -d ' ')
|
|
||||||
|
|
||||||
commit_indices+=($commit_index)
|
|
||||||
|
|
||||||
print_info "State: $state | Peers: $num_peers | Commit Index: $commit_index | Cluster Nodes: $node_count"
|
|
||||||
|
|
||||||
# Check state
|
|
||||||
if [ "$state" = "Leader" ]; then
|
|
||||||
leader_found=true
|
|
||||||
print_pass "Node $node is the Leader"
|
|
||||||
elif [ "$state" = "Follower" ]; then
|
|
||||||
follower_count=$((follower_count + 1))
|
|
||||||
# Check last contact
|
|
||||||
if [ "$last_contact" != "N/A" ] && [ "$last_contact" != "0" ]; then
|
|
||||||
print_pass "Node $node is a Follower (last contact: $last_contact)"
|
|
||||||
else
|
|
||||||
print_warn "Node $node is Follower but last_contact is $last_contact"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
print_fail "Node $node has unexpected state: $state"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check peer count
|
|
||||||
if [ "$num_peers" = "2" ]; then
|
|
||||||
print_pass "Node $node has correct peer count: 2"
|
|
||||||
else
|
|
||||||
print_fail "Node $node has incorrect peer count: $num_peers (expected 2)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check cluster configuration
|
|
||||||
if [ "$node_count" = "3" ]; then
|
|
||||||
print_pass "Node $node sees all 3 cluster members"
|
|
||||||
else
|
|
||||||
print_fail "Node $node only sees $node_count cluster members (expected 3)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check for exactly 1 leader
|
|
||||||
if [ "$leader_found" = true ] && [ "$follower_count" = "2" ]; then
|
|
||||||
print_pass "Cluster has 1 Leader and 2 Followers ✓"
|
|
||||||
else
|
|
||||||
print_fail "Invalid cluster state (Leader found: $leader_found, Followers: $follower_count)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check commit index sync
|
|
||||||
if [ ${#commit_indices[@]} -eq 3 ]; then
|
|
||||||
local first="${commit_indices[0]}"
|
|
||||||
local all_same=true
|
|
||||||
for idx in "${commit_indices[@]}"; do
|
|
||||||
if [ "$idx" != "$first" ]; then
|
|
||||||
all_same=false
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ "$all_same" = true ]; then
|
|
||||||
print_pass "All nodes have synced commit index: $first"
|
|
||||||
else
|
|
||||||
print_warn "Commit indices differ: ${commit_indices[*]} (might be normal if writes are happening)"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
test_rqlite_replication() {
|
|
||||||
print_header "2. RQLITE REPLICATION TEST"
|
|
||||||
|
|
||||||
print_test "Creating test table and inserting data on leader ($BOOTSTRAP)"
|
|
||||||
|
|
||||||
# Create table
|
|
||||||
if ! response=$(curl -s --max-time 5 -XPOST "http://$BOOTSTRAP:5001/db/execute" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '[["CREATE TABLE IF NOT EXISTS test_cluster_health (id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT, node TEXT, value TEXT)"]]' 2>/dev/null); then
|
|
||||||
print_fail "Failed to create table"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
|
|
||||||
local error=$(echo "$response" | jq -r '.results[0].error')
|
|
||||||
if [[ "$error" != "table test_cluster_health already exists" ]]; then
|
|
||||||
print_fail "Table creation error: $error"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
print_pass "Table exists"
|
|
||||||
|
|
||||||
# Insert test data
|
|
||||||
local test_value="test_$(date +%s)"
|
|
||||||
if ! response=$(curl -s --max-time 5 -XPOST "http://$BOOTSTRAP:5001/db/execute" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "[
|
|
||||||
[\"INSERT INTO test_cluster_health (timestamp, node, value) VALUES (datetime('now'), 'bootstrap', '$test_value')\"]
|
|
||||||
]" 2>/dev/null); then
|
|
||||||
print_fail "Failed to insert data"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
|
|
||||||
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
|
|
||||||
local error=$(echo "$response" | jq -r '.results[0].error')
|
|
||||||
print_fail "Insert error: $error"
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
print_pass "Data inserted: $test_value"
|
|
||||||
|
|
||||||
# Wait for replication
|
|
||||||
print_info "Waiting 2 seconds for replication..."
|
|
||||||
sleep 2
|
|
||||||
|
|
||||||
# Query from all nodes
|
|
||||||
for node in "${ALL_NODES[@]}"; do
|
|
||||||
print_test "Reading from $node"
|
|
||||||
|
|
||||||
if ! response=$(curl -s --max-time 5 -XPOST "http://$node:5001/db/query?level=weak" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "[\"SELECT * FROM test_cluster_health WHERE value = '$test_value' LIMIT 1\"]" 2>/dev/null); then
|
|
||||||
print_fail "Failed to query from $node"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
if echo "$response" | jq -e '.results[0].error' >/dev/null 2>&1; then
|
|
||||||
local error=$(echo "$response" | jq -r '.results[0].error')
|
|
||||||
print_fail "Query error on $node: $error"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
local row_count=$(echo "$response" | jq -r '.results[0].values | length // 0')
|
|
||||||
if [ "$row_count" = "1" ]; then
|
|
||||||
local retrieved_value=$(echo "$response" | jq -r '.results[0].values[0][3] // ""')
|
|
||||||
if [ "$retrieved_value" = "$test_value" ]; then
|
|
||||||
print_pass "Data replicated correctly to $node"
|
|
||||||
else
|
|
||||||
print_fail "Data mismatch on $node (got: $retrieved_value, expected: $test_value)"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
print_fail "Expected 1 row from $node, got $row_count"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
test_ipfs_status() {
|
|
||||||
print_header "3. IPFS DAEMON STATUS"
|
|
||||||
|
|
||||||
for node in "${ALL_NODES[@]}"; do
|
|
||||||
print_test "Testing IPFS on $node"
|
|
||||||
|
|
||||||
if ! response=$(curl -s --max-time 5 -X POST http://$node:4501/api/v0/id 2>/dev/null); then
|
|
||||||
print_fail "Cannot connect to IPFS on $node:4501"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
local peer_id=$(echo "$response" | jq -r '.ID // "unknown"')
|
|
||||||
local addr_count=$(echo "$response" | jq -r '.Addresses | length // 0')
|
|
||||||
local agent=$(echo "$response" | jq -r '.AgentVersion // "unknown"')
|
|
||||||
|
|
||||||
if [ "$peer_id" != "unknown" ]; then
|
|
||||||
print_pass "IPFS running on $node (ID: ${peer_id:0:12}...)"
|
|
||||||
print_info "Agent: $agent | Addresses: $addr_count"
|
|
||||||
else
|
|
||||||
print_fail "IPFS not responding correctly on $node"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
test_ipfs_swarm() {
|
|
||||||
print_header "4. IPFS SWARM CONNECTIVITY"
|
|
||||||
|
|
||||||
for node in "${ALL_NODES[@]}"; do
|
|
||||||
print_test "Checking IPFS swarm peers on $node"
|
|
||||||
|
|
||||||
if ! response=$(curl -s --max-time 5 -X POST http://$node:4501/api/v0/swarm/peers 2>/dev/null); then
|
|
||||||
print_fail "Failed to get swarm peers from $node"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
local peer_count=$(echo "$response" | jq -r '.Peers | length // 0')
|
|
||||||
|
|
||||||
if [ "$peer_count" = "2" ]; then
|
|
||||||
print_pass "Node $node connected to 2 IPFS peers"
|
|
||||||
elif [ "$peer_count" -gt "0" ]; then
|
|
||||||
print_warn "Node $node connected to $peer_count IPFS peers (expected 2)"
|
|
||||||
else
|
|
||||||
print_fail "Node $node has no IPFS swarm peers"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
test_ipfs_cluster_status() {
|
|
||||||
print_header "5. IPFS CLUSTER STATUS"
|
|
||||||
|
|
||||||
for node in "${ALL_NODES[@]}"; do
|
|
||||||
print_test "Testing IPFS Cluster on $node"
|
|
||||||
|
|
||||||
if ! response=$(curl -s --max-time 5 http://$node:9094/id 2>/dev/null); then
|
|
||||||
print_fail "Cannot connect to IPFS Cluster on $node:9094"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
local cluster_id=$(echo "$response" | jq -r '.id // "unknown"')
|
|
||||||
local cluster_peers=$(echo "$response" | jq -r '.cluster_peers | length // 0')
|
|
||||||
local version=$(echo "$response" | jq -r '.version // "unknown"')
|
|
||||||
|
|
||||||
if [ "$cluster_id" != "unknown" ]; then
|
|
||||||
print_pass "IPFS Cluster running on $node (ID: ${cluster_id:0:12}...)"
|
|
||||||
print_info "Version: $version | Cluster Peers: $cluster_peers"
|
|
||||||
|
|
||||||
if [ "$cluster_peers" = "3" ]; then
|
|
||||||
print_pass "Node $node sees all 3 cluster peers"
|
|
||||||
else
|
|
||||||
print_warn "Node $node sees $cluster_peers cluster peers (expected 3)"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
print_fail "IPFS Cluster not responding correctly on $node"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
test_ipfs_cluster_pins() {
|
|
||||||
print_header "6. IPFS CLUSTER PIN CONSISTENCY"
|
|
||||||
|
|
||||||
local pin_counts=()
|
|
||||||
|
|
||||||
for node in "${ALL_NODES[@]}"; do
|
|
||||||
print_test "Checking pins on $node"
|
|
||||||
|
|
||||||
if ! response=$(curl -s --max-time 5 http://$node:9094/pins 2>/dev/null); then
|
|
||||||
print_fail "Failed to get pins from $node"
|
|
||||||
pin_counts+=(0)
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
local pin_count=$(echo "$response" | jq -r 'length // 0')
|
|
||||||
pin_counts+=($pin_count)
|
|
||||||
print_pass "Node $node has $pin_count pins"
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check if all nodes have same pin count
|
|
||||||
if [ ${#pin_counts[@]} -eq 3 ]; then
|
|
||||||
local first="${pin_counts[0]}"
|
|
||||||
local all_same=true
|
|
||||||
for count in "${pin_counts[@]}"; do
|
|
||||||
if [ "$count" != "$first" ]; then
|
|
||||||
all_same=false
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ "$all_same" = true ]; then
|
|
||||||
print_pass "All nodes have consistent pin count: $first"
|
|
||||||
else
|
|
||||||
print_warn "Pin counts differ: ${pin_counts[*]} (might be syncing)"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
print_summary() {
|
|
||||||
print_header "TEST SUMMARY"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo -e "${GREEN}Passed: $PASSED${NC}"
|
|
||||||
echo -e "${YELLOW}Warnings: $WARNINGS${NC}"
|
|
||||||
echo -e "${RED}Failed: $FAILED${NC}"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
if [ $FAILED -eq 0 ]; then
|
|
||||||
echo -e "${GREEN}🎉 All critical tests passed! Cluster is healthy.${NC}"
|
|
||||||
exit 0
|
|
||||||
elif [ $FAILED -le 2 ]; then
|
|
||||||
echo -e "${YELLOW}⚠️ Some tests failed. Review the output above.${NC}"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo -e "${RED}❌ Multiple failures detected. Cluster needs attention.${NC}"
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main execution
|
|
||||||
main() {
|
|
||||||
echo ""
|
|
||||||
echo -e "${BLUE}╔════════════════════════════════════════════╗${NC}"
|
|
||||||
echo -e "${BLUE}║ DEBROS Production Cluster Health Check ║${NC}"
|
|
||||||
echo -e "${BLUE}╚════════════════════════════════════════════╝${NC}"
|
|
||||||
echo ""
|
|
||||||
echo "Testing cluster:"
|
|
||||||
echo " Bootstrap: $BOOTSTRAP"
|
|
||||||
echo " Node 1: $NODE1"
|
|
||||||
echo " Node 2: $NODE2"
|
|
||||||
|
|
||||||
test_rqlite_status
|
|
||||||
test_rqlite_replication
|
|
||||||
test_ipfs_status
|
|
||||||
test_ipfs_swarm
|
|
||||||
test_ipfs_cluster_status
|
|
||||||
test_ipfs_cluster_pins
|
|
||||||
print_summary
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run main
|
|
||||||
main
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user