From f7db698273342842ee7c422258b9760e2acf58a4 Mon Sep 17 00:00:00 2001 From: anonpenguin23 Date: Tue, 10 Feb 2026 19:32:08 +0200 Subject: [PATCH] Bug fix on production orchestrator on start and stop --- pkg/cli/production/lifecycle/restart.go | 10 +--- pkg/cli/production/lifecycle/start.go | 15 ++---- pkg/cli/production/upgrade/orchestrator.go | 20 +++---- pkg/cli/utils/systemd.go | 62 ++++++++++++++++++++++ 4 files changed, 78 insertions(+), 29 deletions(-) diff --git a/pkg/cli/production/lifecycle/restart.go b/pkg/cli/production/lifecycle/restart.go index 6daed86..9145a10 100644 --- a/pkg/cli/production/lifecycle/restart.go +++ b/pkg/cli/production/lifecycle/restart.go @@ -53,15 +53,9 @@ func HandleRestart() { os.Exit(1) } - // Start all services + // Start all services in dependency order (namespace: rqlite → olric → gateway) fmt.Printf(" Starting services...\n") - for _, svc := range services { - if err := exec.Command("systemctl", "start", svc).Run(); err != nil { - fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err) - } else { - fmt.Printf(" ✓ Started %s\n", svc) - } - } + utils.StartServicesOrdered(services, "start") fmt.Printf("\n✅ All services restarted\n") } diff --git a/pkg/cli/production/lifecycle/start.go b/pkg/cli/production/lifecycle/start.go index ce36de6..823859e 100644 --- a/pkg/cli/production/lifecycle/start.go +++ b/pkg/cli/production/lifecycle/start.go @@ -81,9 +81,8 @@ func HandleStart() { os.Exit(1) } - // Enable and start inactive services + // Re-enable inactive services first (in case they were disabled by 'orama prod stop') for _, svc := range inactive { - // Re-enable the service first (in case it was disabled by 'orama prod stop') enabled, err := utils.IsServiceEnabled(svc) if err == nil && !enabled { if err := exec.Command("systemctl", "enable", svc).Run(); err != nil { @@ -92,18 +91,12 @@ func HandleStart() { fmt.Printf(" ✓ Enabled %s (will auto-start on boot)\n", svc) } } - - // Start the service - if err := exec.Command("systemctl", "start", svc).Run(); err != nil { - fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err) - } else { - fmt.Printf(" ✓ Started %s\n", svc) - } } + // Start services in dependency order (namespace: rqlite → olric → gateway) + utils.StartServicesOrdered(inactive, "start") + // Give services more time to fully initialize before verification - // Some services may need more time to start up, especially if they're - // waiting for dependencies or initializing databases fmt.Printf(" ⏳ Waiting for services to initialize...\n") time.Sleep(5 * time.Second) diff --git a/pkg/cli/production/upgrade/orchestrator.go b/pkg/cli/production/upgrade/orchestrator.go index c145fa0..3a75dc5 100644 --- a/pkg/cli/production/upgrade/orchestrator.go +++ b/pkg/cli/production/upgrade/orchestrator.go @@ -694,24 +694,24 @@ func (o *Orchestrator) restartServices() error { } } - // Start any remaining services not in priority list (includes namespace services) + // Restart remaining services (namespace + any others) in dependency order. + // Namespace services are restarted: rqlite → olric (+ wait) → gateway. + // Without ordering, the gateway starts before Olric is accepting connections, + // the Olric client initialization fails, and the cache stays permanently unavailable. + var remaining []string for _, svc := range services { - found := false + isPriority := false for _, priority := range priorityOrder { if svc == priority { - found = true + isPriority = true break } } - if !found { - fmt.Printf(" Starting %s...\n", svc) - if err := exec.Command("systemctl", "restart", svc).Run(); err != nil { - fmt.Printf(" ⚠️ Failed to restart %s: %v\n", svc, err) - } else { - fmt.Printf(" ✓ Started %s\n", svc) - } + if !isPriority { + remaining = append(remaining, svc) } } + utils.StartServicesOrdered(remaining, "restart") fmt.Printf(" ✓ All services restarted\n") diff --git a/pkg/cli/utils/systemd.go b/pkg/cli/utils/systemd.go index c807bf9..068825f 100644 --- a/pkg/cli/utils/systemd.go +++ b/pkg/cli/utils/systemd.go @@ -9,6 +9,7 @@ import ( "path/filepath" "strings" "syscall" + "time" ) var ErrServiceNotFound = errors.New("service not found") @@ -285,3 +286,64 @@ func identifyPortProcess(port int) string { return "unknown process" } +// NamespaceServiceOrder defines the dependency order for namespace services. +// RQLite must start first (database), then Olric (cache), then Gateway (depends on both). +var NamespaceServiceOrder = []string{"rqlite", "olric", "gateway"} + +// StartServicesOrdered starts services respecting namespace dependency order. +// Namespace services are started in order: rqlite → olric (+ wait) → gateway. +// Non-namespace services are started after. +// The action parameter is the systemctl command (e.g., "start" or "restart"). +func StartServicesOrdered(services []string, action string) { + // Separate namespace services by type, and collect non-namespace services + nsServices := make(map[string][]string) // svcType → []svcName + var other []string + + for _, svc := range services { + matched := false + for _, svcType := range NamespaceServiceOrder { + prefix := "debros-namespace-" + svcType + "@" + if strings.HasPrefix(svc, prefix) { + nsServices[svcType] = append(nsServices[svcType], svc) + matched = true + break + } + } + if !matched { + other = append(other, svc) + } + } + + // Start namespace services in dependency order + for _, svcType := range NamespaceServiceOrder { + svcs := nsServices[svcType] + for _, svc := range svcs { + fmt.Printf(" %s%sing %s...\n", strings.ToUpper(action[:1]), action[1:], svc) + if err := exec.Command("systemctl", action, svc).Run(); err != nil { + fmt.Printf(" ⚠️ Failed to %s %s: %v\n", action, svc, err) + } else { + fmt.Printf(" ✓ %s\n", svc) + } + } + + // After starting all Olric instances for all namespaces, wait for them + // to bind their HTTP ports and form memberlist clusters before starting + // gateways. Without this, gateways start before Olric is ready and the + // Olric client initialization fails permanently. + if svcType == "olric" && len(svcs) > 0 { + fmt.Printf(" Waiting for namespace Olric instances to become ready...\n") + time.Sleep(5 * time.Second) + } + } + + // Start any remaining non-namespace services + for _, svc := range other { + fmt.Printf(" %s%sing %s...\n", strings.ToUpper(action[:1]), action[1:], svc) + if err := exec.Command("systemctl", action, svc).Run(); err != nil { + fmt.Printf(" ⚠️ Failed to %s %s: %v\n", action, svc, err) + } else { + fmt.Printf(" ✓ %s\n", svc) + } + } +} +