diff --git a/CHANGELOG.md b/CHANGELOG.md index e883492..23a85ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,21 @@ The format is based on [Keep a Changelog][keepachangelog] and adheres to [Semant ### Deprecated ### Fixed +## [0.69.16] - 2025-11-16 + +### Added +\n +### Changed +- Improved the `make stop` command to ensure a more robust and graceful shutdown of development services. +- Enhanced the `make kill` command and underlying scripts for more reliable force termination of stray development processes. +- Increased the graceful shutdown timeout for development processes from 500ms to 2 seconds before resorting to force kill. + +### Deprecated + +### Removed + +### Fixed +\n ## [0.69.15] - 2025-11-16 ### Added diff --git a/Makefile b/Makefile index 7033f09..7660e69 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ test-e2e: .PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill -VERSION := 0.69.15 +VERSION := 0.69.16 COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' @@ -85,13 +85,19 @@ run-gateway: dev: build @./bin/dbn dev up -# Kill all processes (graceful shutdown + force kill stray processes) +# Graceful shutdown of all dev services +stop: + @if [ -f ./bin/dbn ]; then \ + ./bin/dbn dev down; \ + else \ + echo "⚠️ dbn binary not found, using force kill instead..."; \ + bash scripts/dev-kill-all.sh; \ + fi + +# Force kill all processes (immediate termination) kill: @bash scripts/dev-kill-all.sh -stop: - @./bin/dbn dev down - # Help help: @echo "Available targets:" @@ -107,7 +113,8 @@ help: @echo " - Validates cluster health (IPFS peers, RQLite, LibP2P)" @echo " - Stops all services if health checks fail" @echo " - Includes comprehensive logging" - @echo " make kill - Stop all development services" + @echo " make stop - Gracefully stop all development services" + @echo " make kill - Force kill all development services (use if stop fails)" @echo "" @echo "Development Management (via dbn):" @echo " ./bin/dbn dev status - Show status of all dev services" diff --git a/pkg/environments/development/runner.go b/pkg/environments/development/runner.go index a2272ef..d4e665a 100644 --- a/pkg/environments/development/runner.go +++ b/pkg/environments/development/runner.go @@ -1027,34 +1027,56 @@ func (pm *ProcessManager) stopProcess(name string) error { return nil } + // Check if process exists before trying to kill + if !checkProcessRunning(pid) { + os.Remove(pidPath) + fmt.Fprintf(pm.logWriter, "✓ %s (not running)\n", name) + return nil + } + proc, err := os.FindProcess(pid) if err != nil { os.Remove(pidPath) return nil } - // Try graceful shutdown first + // Try graceful shutdown first (SIGTERM) proc.Signal(os.Interrupt) - // Wait a bit for graceful shutdown - time.Sleep(500 * time.Millisecond) + // Wait up to 2 seconds for graceful shutdown + gracefulShutdown := false + for i := 0; i < 20; i++ { + time.Sleep(100 * time.Millisecond) + if !checkProcessRunning(pid) { + gracefulShutdown = true + break + } + } - // Check if process is still running - if checkProcessRunning(pid) { - // Force kill if still running + // Force kill if still running after graceful attempt + if !gracefulShutdown && checkProcessRunning(pid) { proc.Signal(os.Kill) time.Sleep(200 * time.Millisecond) - // Also kill any child processes (platform-specific) + // Kill any child processes (platform-specific) if runtime.GOOS != "windows" { - // Use pkill to kill children on Unix-like systems exec.Command("pkill", "-9", "-P", fmt.Sprintf("%d", pid)).Run() } + + // Final force kill attempt if somehow still alive + if checkProcessRunning(pid) { + exec.Command("kill", "-9", fmt.Sprintf("%d", pid)).Run() + time.Sleep(100 * time.Millisecond) + } } os.Remove(pidPath) - fmt.Fprintf(pm.logWriter, "✓ %s stopped\n", name) + if gracefulShutdown { + fmt.Fprintf(pm.logWriter, "✓ %s stopped gracefully\n", name) + } else { + fmt.Fprintf(pm.logWriter, "✓ %s stopped (forced)\n", name) + } return nil } diff --git a/scripts/dev-kill-all.sh b/scripts/dev-kill-all.sh index 6247798..7513d56 100755 --- a/scripts/dev-kill-all.sh +++ b/scripts/dev-kill-all.sh @@ -1,7 +1,7 @@ #!/bin/bash set -euo pipefail -echo "Force killing all processes on dev ports..." +echo "Force killing all debros development processes..." # Define all dev ports (5 nodes topology: bootstrap, bootstrap2, node2, node3, node4) PORTS=( @@ -32,16 +32,13 @@ PORTS=( killed_count=0 killed_pids=() -# Kill all processes using these ports (LISTEN, ESTABLISHED, or any state) +# Method 1: Kill all processes using these ports for port in "${PORTS[@]}"; do - # Get all PIDs using this port in ANY TCP state pids=$(lsof -nP -iTCP:"$port" -t 2>/dev/null || true) if [[ -n "$pids" ]]; then - echo "Killing processes on port $port: $pids" + echo " Killing processes on port $port: $pids" for pid in $pids; do - # Kill the process and all its children kill -9 "$pid" 2>/dev/null || true - # Also kill any children of this process pkill -9 -P "$pid" 2>/dev/null || true killed_pids+=("$pid") done @@ -49,18 +46,26 @@ for port in "${PORTS[@]}"; do fi done -# Also kill processes by command name patterns (in case they're orphaned) -# This catches processes that might be using debros ports but not showing up in lsof -COMMANDS=("node" "ipfs" "ipfs-cluster-service" "rqlited" "olric-server" "gateway") -for cmd in "${COMMANDS[@]}"; do - # Find all processes with this command name - all_pids=$(pgrep -f "^.*$cmd.*" 2>/dev/null || true) +# Method 2: Kill processes by specific patterns (ONLY debros-related) +# Be very specific to avoid killing unrelated processes +SPECIFIC_PATTERNS=( + "ipfs daemon" + "ipfs-cluster-service daemon" + "olric-server" + "bin/node" + "bin/gateway" + "anyone-client" +) + +for pattern in "${SPECIFIC_PATTERNS[@]}"; do + # Use exact pattern matching to avoid false positives + all_pids=$(pgrep -f "$pattern" 2>/dev/null || true) if [[ -n "$all_pids" ]]; then for pid in $all_pids; do - # Check if this process is using any of our dev ports - port_match=$(lsof -nP -p "$pid" -iTCP 2>/dev/null | grep -E ":(400[1-4]|401[1-1]|410[1-4]|411[1-1]|450[1-4]|451[1-1]|500[1-4]|501[1-1]|600[1-1]|700[1-4]|701[1-1]|750[1-4]|751[1-1]|332[02]|9050|909[4-9]|910[4-9]|911[4-9]|912[4-9]|913[4-9]|909[6-9]|910[6-9]|911[6-9]|912[6-9]|913[6-9])" || true) - if [[ -n "$port_match" ]]; then - echo "Killing orphaned $cmd process (PID: $pid) using dev ports" + # Double-check the command line to avoid killing wrong processes + cmdline=$(ps -p "$pid" -o command= 2>/dev/null || true) + if [[ "$cmdline" == *"$pattern"* ]]; then + echo " Killing $pattern process (PID: $pid)" kill -9 "$pid" 2>/dev/null || true pkill -9 -P "$pid" 2>/dev/null || true killed_pids+=("$pid") @@ -69,26 +74,40 @@ for cmd in "${COMMANDS[@]}"; do fi done -# Clean up PID files +# Method 3: Kill processes using PID files PIDS_DIR="$HOME/.debros/.pids" if [[ -d "$PIDS_DIR" ]]; then - rm -f "$PIDS_DIR"/*.pid || true + for pidfile in "$PIDS_DIR"/*.pid; do + if [[ -f "$pidfile" ]]; then + pid=$(cat "$pidfile" 2>/dev/null || true) + if [[ -n "$pid" ]] && ps -p "$pid" > /dev/null 2>&1; then + name=$(basename "$pidfile" .pid) + echo " Killing $name (PID: $pid from pidfile)" + kill -9 "$pid" 2>/dev/null || true + pkill -9 -P "$pid" 2>/dev/null || true + killed_pids+=("$pid") + fi + fi + done + # Clean up all PID files + rm -f "$PIDS_DIR"/*.pid 2>/dev/null || true fi # Remove duplicates and report if [[ ${#killed_pids[@]} -gt 0 ]]; then unique_pids=($(printf '%s\n' "${killed_pids[@]}" | sort -u)) - echo "✓ Killed ${#unique_pids[@]} unique process(es) on $killed_count port(s)" + echo "✓ Killed ${#unique_pids[@]} unique process(es)" else - echo "✓ No processes found on dev ports" + echo "✓ No debros processes found running" fi # Final verification: check if any ports are still in use still_in_use=0 +busy_ports=() for port in "${PORTS[@]}"; do pids=$(lsof -nP -iTCP:"$port" -t 2>/dev/null || true) if [[ -n "$pids" ]]; then - echo "⚠️ Warning: Port $port still in use by: $pids" + busy_ports+=("$port") still_in_use=$((still_in_use + 1)) fi done @@ -96,6 +115,7 @@ done if [[ $still_in_use -eq 0 ]]; then echo "✓ All dev ports are now free" else - echo "⚠️ $still_in_use port(s) still in use - you may need to manually kill processes" + echo "⚠️ Warning: $still_in_use port(s) still in use: ${busy_ports[*]}" + echo " Run 'lsof -nP -iTCP:' to identify the processes" fi