diff --git a/Makefile b/Makefile index b252c78..d81e857 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ test-e2e-quick: .PHONY: build clean test deps tidy fmt vet lint install-hooks redeploy-devnet redeploy-testnet release health -VERSION := 0.108.0 +VERSION := 0.109.0 COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' diff --git a/pkg/cli/utils/systemd.go b/pkg/cli/utils/systemd.go index 354b9d4..a8b363d 100644 --- a/pkg/cli/utils/systemd.go +++ b/pkg/cli/utils/systemd.go @@ -1,6 +1,7 @@ package utils import ( + "bufio" "errors" "fmt" "net" @@ -12,6 +13,7 @@ import ( "time" "github.com/DeBrosOfficial/network/pkg/constants" + "gopkg.in/yaml.v3" ) var ErrServiceNotFound = errors.New("service not found") @@ -325,13 +327,25 @@ func StartServicesOrdered(services []string, action string) { } } - // After starting all Olric instances for all namespaces, wait for them - // to bind their HTTP ports and form memberlist clusters before starting - // gateways. Without this, gateways start before Olric is ready and the - // Olric client initialization fails permanently. + // After starting all Olric instances, wait for each one's memberlist + // port to accept TCP connections before starting gateways. Without this, + // gateways start before Olric is ready and the Olric client initialization + // fails permanently (no retry). if svcType == "olric" && len(svcs) > 0 { fmt.Printf(" Waiting for namespace Olric instances to become ready...\n") - time.Sleep(5 * time.Second) + for _, svc := range svcs { + ns := strings.TrimPrefix(svc, "orama-namespace-olric@") + port := getOlricMemberlistPort(ns) + if port <= 0 { + fmt.Printf(" ⚠️ Could not determine Olric memberlist port for namespace %s\n", ns) + continue + } + if err := waitForTCPPort(port, 30*time.Second); err != nil { + fmt.Printf(" ⚠️ Olric memberlist port %d not ready for namespace %s: %v\n", port, ns, err) + } else { + fmt.Printf(" ✓ Olric ready for namespace %s (port %d)\n", ns, port) + } + } } } @@ -345,3 +359,62 @@ func StartServicesOrdered(services []string, action string) { } } } + +// getOlricMemberlistPort reads a namespace's Olric config and returns the +// memberlist bind port. Returns 0 if the config cannot be read or parsed. +func getOlricMemberlistPort(namespace string) int { + envFile := filepath.Join("/opt/orama/.orama/data/namespaces", namespace, "olric.env") + f, err := os.Open(envFile) + if err != nil { + return 0 + } + defer f.Close() + + // Read OLRIC_SERVER_CONFIG path from env file + var configPath string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "OLRIC_SERVER_CONFIG=") { + configPath = strings.TrimPrefix(line, "OLRIC_SERVER_CONFIG=") + break + } + } + if configPath == "" { + return 0 + } + + // Parse the YAML config to extract memberlist.bindPort + configData, err := os.ReadFile(configPath) + if err != nil { + return 0 + } + + var cfg struct { + Memberlist struct { + BindPort int `yaml:"bindPort"` + } `yaml:"memberlist"` + } + if err := yaml.Unmarshal(configData, &cfg); err != nil { + return 0 + } + + return cfg.Memberlist.BindPort +} + +// waitForTCPPort polls a TCP port until it accepts connections or the timeout expires. +func waitForTCPPort(port int, timeout time.Duration) error { + addr := fmt.Sprintf("localhost:%d", port) + deadline := time.Now().Add(timeout) + + for time.Now().Before(deadline) { + conn, err := net.DialTimeout("tcp", addr, 2*time.Second) + if err == nil { + conn.Close() + return nil + } + time.Sleep(1 * time.Second) + } + + return fmt.Errorf("port %d did not become ready within %s", port, timeout) +} diff --git a/pkg/cli/utils/systemd_test.go b/pkg/cli/utils/systemd_test.go new file mode 100644 index 0000000..ea074f0 --- /dev/null +++ b/pkg/cli/utils/systemd_test.go @@ -0,0 +1,119 @@ +package utils + +import ( + "net" + "testing" + "time" + + "gopkg.in/yaml.v3" +) + +func TestWaitForTCPPort_Success(t *testing.T) { + // Start a TCP listener on a random port + ln, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("failed to start listener: %v", err) + } + defer ln.Close() + + port := ln.Addr().(*net.TCPAddr).Port + + err = waitForTCPPort(port, 5*time.Second) + if err != nil { + t.Errorf("expected success, got error: %v", err) + } +} + +func TestWaitForTCPPort_Timeout(t *testing.T) { + // Use a port that nothing is listening on + ln, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("failed to get free port: %v", err) + } + port := ln.Addr().(*net.TCPAddr).Port + ln.Close() // Close immediately so nothing is listening + + err = waitForTCPPort(port, 3*time.Second) + if err == nil { + t.Error("expected timeout error, got nil") + } +} + +func TestWaitForTCPPort_DelayedStart(t *testing.T) { + // Get a free port + ln, err := net.Listen("tcp", "localhost:0") + if err != nil { + t.Fatalf("failed to get free port: %v", err) + } + port := ln.Addr().(*net.TCPAddr).Port + ln.Close() + + // Start listening after a delay + go func() { + time.Sleep(2 * time.Second) + newLn, err := net.Listen("tcp", ln.Addr().String()) + if err != nil { + return + } + defer newLn.Close() + // Keep it open long enough for the test + time.Sleep(10 * time.Second) + }() + + err = waitForTCPPort(port, 10*time.Second) + if err != nil { + t.Errorf("expected success after delayed start, got error: %v", err) + } +} + +func TestOlricConfigYAMLParsing(t *testing.T) { + // Verify that the YAML parsing struct matches the format + // generated by pkg/namespace/systemd_spawner.go + configContent := `server: + bindAddr: 10.0.0.1 + bindPort: 10002 +memberlist: + environment: lan + bindAddr: 10.0.0.1 + bindPort: 10003 + peers: + - 10.0.0.2:10003 +partitionCount: 12 +` + + var cfg struct { + Memberlist struct { + BindPort int `yaml:"bindPort"` + } `yaml:"memberlist"` + } + + if err := yaml.Unmarshal([]byte(configContent), &cfg); err != nil { + t.Fatalf("failed to parse Olric config YAML: %v", err) + } + + if cfg.Memberlist.BindPort != 10003 { + t.Errorf("expected memberlist port 10003, got %d", cfg.Memberlist.BindPort) + } +} + +func TestOlricConfigYAMLParsing_MissingMemberlist(t *testing.T) { + // Config without memberlist section should return zero port + configContent := `server: + bindAddr: 10.0.0.1 + bindPort: 10002 +` + + var cfg struct { + Memberlist struct { + BindPort int `yaml:"bindPort"` + } `yaml:"memberlist"` + } + + if err := yaml.Unmarshal([]byte(configContent), &cfg); err != nil { + t.Fatalf("unexpected parse error: %v", err) + } + + if cfg.Memberlist.BindPort != 0 { + t.Errorf("expected port 0 for missing memberlist, got %d", cfg.Memberlist.BindPort) + } +}