mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-03-17 10:46:58 +00:00
144 lines
4.0 KiB
Go
144 lines
4.0 KiB
Go
package lifecycle
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"time"
|
|
|
|
"github.com/DeBrosOfficial/network/pkg/cli/utils"
|
|
)
|
|
|
|
// HandlePostUpgrade brings the node back online after an upgrade:
|
|
// 1. Resets failed + unmasks + enables all services
|
|
// 2. Starts services in dependency order
|
|
// 3. Waits for global RQLite to be ready
|
|
// 4. Waits for each namespace RQLite to be ready
|
|
// 5. Removes maintenance flag
|
|
func HandlePostUpgrade() {
|
|
if os.Geteuid() != 0 {
|
|
fmt.Fprintf(os.Stderr, "Error: post-upgrade must be run as root (use sudo)\n")
|
|
os.Exit(1)
|
|
}
|
|
|
|
fmt.Printf("Post-upgrade: bringing node back online...\n")
|
|
|
|
// 1. Get all services
|
|
services := utils.GetProductionServices()
|
|
if len(services) == 0 {
|
|
fmt.Printf(" Warning: no Orama services found\n")
|
|
return
|
|
}
|
|
|
|
// Reset failed state
|
|
resetArgs := []string{"reset-failed"}
|
|
resetArgs = append(resetArgs, services...)
|
|
exec.Command("systemctl", resetArgs...).Run()
|
|
|
|
// Unmask and enable all services
|
|
for _, svc := range services {
|
|
masked, err := utils.IsServiceMasked(svc)
|
|
if err == nil && masked {
|
|
exec.Command("systemctl", "unmask", svc).Run()
|
|
}
|
|
enabled, err := utils.IsServiceEnabled(svc)
|
|
if err == nil && !enabled {
|
|
exec.Command("systemctl", "enable", svc).Run()
|
|
}
|
|
}
|
|
fmt.Printf(" Services reset and enabled\n")
|
|
|
|
// 2. Start services in dependency order
|
|
fmt.Printf(" Starting services...\n")
|
|
utils.StartServicesOrdered(services, "start")
|
|
fmt.Printf(" Services started\n")
|
|
|
|
// 3. Wait for global RQLite (port 5001) to be ready
|
|
fmt.Printf(" Waiting for global RQLite (port 5001)...\n")
|
|
if err := waitForRQLiteReady(5001, 120*time.Second); err != nil {
|
|
fmt.Printf(" Warning: global RQLite not ready: %v\n", err)
|
|
} else {
|
|
fmt.Printf(" Global RQLite ready\n")
|
|
}
|
|
|
|
// 4. Wait for each namespace RQLite with a global timeout of 5 minutes
|
|
nsPorts := getNamespaceRQLitePorts()
|
|
if len(nsPorts) > 0 {
|
|
fmt.Printf(" Waiting for %d namespace RQLite instances...\n", len(nsPorts))
|
|
globalDeadline := time.Now().Add(5 * time.Minute)
|
|
|
|
healthy := 0
|
|
failed := 0
|
|
for ns, port := range nsPorts {
|
|
remaining := time.Until(globalDeadline)
|
|
if remaining <= 0 {
|
|
fmt.Printf(" Warning: global timeout reached, skipping remaining namespaces\n")
|
|
failed += len(nsPorts) - healthy - failed
|
|
break
|
|
}
|
|
timeout := 90 * time.Second
|
|
if remaining < timeout {
|
|
timeout = remaining
|
|
}
|
|
fmt.Printf(" Waiting for namespace '%s' (port %d)...\n", ns, port)
|
|
if err := waitForRQLiteReady(port, timeout); err != nil {
|
|
fmt.Printf(" Warning: namespace '%s' RQLite not ready: %v\n", ns, err)
|
|
failed++
|
|
} else {
|
|
fmt.Printf(" Namespace '%s' ready\n", ns)
|
|
healthy++
|
|
}
|
|
}
|
|
fmt.Printf(" Namespace RQLite: %d healthy, %d failed\n", healthy, failed)
|
|
}
|
|
|
|
// 5. Remove maintenance flag
|
|
if err := os.Remove(maintenanceFlagPath); err != nil && !os.IsNotExist(err) {
|
|
fmt.Printf(" Warning: failed to remove maintenance flag: %v\n", err)
|
|
} else {
|
|
fmt.Printf(" Maintenance flag removed\n")
|
|
}
|
|
|
|
fmt.Printf("Post-upgrade complete. Node is back online.\n")
|
|
}
|
|
|
|
// waitForRQLiteReady polls an RQLite instance's /status endpoint until it
|
|
// reports Leader or Follower state, or the timeout expires.
|
|
func waitForRQLiteReady(port int, timeout time.Duration) error {
|
|
deadline := time.Now().Add(timeout)
|
|
client := &http.Client{Timeout: 2 * time.Second}
|
|
url := fmt.Sprintf("http://localhost:%d/status", port)
|
|
|
|
for time.Now().Before(deadline) {
|
|
resp, err := client.Get(url)
|
|
if err != nil {
|
|
time.Sleep(2 * time.Second)
|
|
continue
|
|
}
|
|
|
|
body, _ := io.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
|
|
var status struct {
|
|
Store struct {
|
|
Raft struct {
|
|
State string `json:"state"`
|
|
} `json:"raft"`
|
|
} `json:"store"`
|
|
}
|
|
if err := json.Unmarshal(body, &status); err == nil {
|
|
state := status.Store.Raft.State
|
|
if state == "Leader" || state == "Follower" {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
time.Sleep(2 * time.Second)
|
|
}
|
|
|
|
return fmt.Errorf("timeout after %s waiting for RQLite on port %d", timeout, port)
|
|
}
|