Bug fix on production orchestrator on start and stop

This commit is contained in:
anonpenguin23 2026-02-10 19:32:08 +02:00
parent a78e09d2b9
commit f7db698273
4 changed files with 78 additions and 29 deletions

View File

@ -53,15 +53,9 @@ func HandleRestart() {
os.Exit(1)
}
// Start all services
// Start all services in dependency order (namespace: rqlite → olric → gateway)
fmt.Printf(" Starting services...\n")
for _, svc := range services {
if err := exec.Command("systemctl", "start", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err)
} else {
fmt.Printf(" ✓ Started %s\n", svc)
}
}
utils.StartServicesOrdered(services, "start")
fmt.Printf("\n✅ All services restarted\n")
}

View File

@ -81,9 +81,8 @@ func HandleStart() {
os.Exit(1)
}
// Enable and start inactive services
// Re-enable inactive services first (in case they were disabled by 'orama prod stop')
for _, svc := range inactive {
// Re-enable the service first (in case it was disabled by 'orama prod stop')
enabled, err := utils.IsServiceEnabled(svc)
if err == nil && !enabled {
if err := exec.Command("systemctl", "enable", svc).Run(); err != nil {
@ -92,18 +91,12 @@ func HandleStart() {
fmt.Printf(" ✓ Enabled %s (will auto-start on boot)\n", svc)
}
}
// Start the service
if err := exec.Command("systemctl", "start", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to start %s: %v\n", svc, err)
} else {
fmt.Printf(" ✓ Started %s\n", svc)
}
}
// Start services in dependency order (namespace: rqlite → olric → gateway)
utils.StartServicesOrdered(inactive, "start")
// Give services more time to fully initialize before verification
// Some services may need more time to start up, especially if they're
// waiting for dependencies or initializing databases
fmt.Printf(" ⏳ Waiting for services to initialize...\n")
time.Sleep(5 * time.Second)

View File

@ -694,24 +694,24 @@ func (o *Orchestrator) restartServices() error {
}
}
// Start any remaining services not in priority list (includes namespace services)
// Restart remaining services (namespace + any others) in dependency order.
// Namespace services are restarted: rqlite → olric (+ wait) → gateway.
// Without ordering, the gateway starts before Olric is accepting connections,
// the Olric client initialization fails, and the cache stays permanently unavailable.
var remaining []string
for _, svc := range services {
found := false
isPriority := false
for _, priority := range priorityOrder {
if svc == priority {
found = true
isPriority = true
break
}
}
if !found {
fmt.Printf(" Starting %s...\n", svc)
if err := exec.Command("systemctl", "restart", svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to restart %s: %v\n", svc, err)
} else {
fmt.Printf(" ✓ Started %s\n", svc)
}
if !isPriority {
remaining = append(remaining, svc)
}
}
utils.StartServicesOrdered(remaining, "restart")
fmt.Printf(" ✓ All services restarted\n")

View File

@ -9,6 +9,7 @@ import (
"path/filepath"
"strings"
"syscall"
"time"
)
var ErrServiceNotFound = errors.New("service not found")
@ -285,3 +286,64 @@ func identifyPortProcess(port int) string {
return "unknown process"
}
// NamespaceServiceOrder defines the dependency order for namespace services.
// RQLite must start first (database), then Olric (cache), then Gateway (depends on both).
var NamespaceServiceOrder = []string{"rqlite", "olric", "gateway"}
// StartServicesOrdered starts services respecting namespace dependency order.
// Namespace services are started in order: rqlite → olric (+ wait) → gateway.
// Non-namespace services are started after.
// The action parameter is the systemctl command (e.g., "start" or "restart").
func StartServicesOrdered(services []string, action string) {
// Separate namespace services by type, and collect non-namespace services
nsServices := make(map[string][]string) // svcType → []svcName
var other []string
for _, svc := range services {
matched := false
for _, svcType := range NamespaceServiceOrder {
prefix := "debros-namespace-" + svcType + "@"
if strings.HasPrefix(svc, prefix) {
nsServices[svcType] = append(nsServices[svcType], svc)
matched = true
break
}
}
if !matched {
other = append(other, svc)
}
}
// Start namespace services in dependency order
for _, svcType := range NamespaceServiceOrder {
svcs := nsServices[svcType]
for _, svc := range svcs {
fmt.Printf(" %s%sing %s...\n", strings.ToUpper(action[:1]), action[1:], svc)
if err := exec.Command("systemctl", action, svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to %s %s: %v\n", action, svc, err)
} else {
fmt.Printf(" ✓ %s\n", svc)
}
}
// After starting all Olric instances for all namespaces, wait for them
// to bind their HTTP ports and form memberlist clusters before starting
// gateways. Without this, gateways start before Olric is ready and the
// Olric client initialization fails permanently.
if svcType == "olric" && len(svcs) > 0 {
fmt.Printf(" Waiting for namespace Olric instances to become ready...\n")
time.Sleep(5 * time.Second)
}
}
// Start any remaining non-namespace services
for _, svc := range other {
fmt.Printf(" %s%sing %s...\n", strings.ToUpper(action[:1]), action[1:], svc)
if err := exec.Command("systemctl", action, svc).Run(); err != nil {
fmt.Printf(" ⚠️ Failed to %s %s: %v\n", action, svc, err)
} else {
fmt.Printf(" ✓ %s\n", svc)
}
}
}