bug fixing on namespaces

This commit is contained in:
anonpenguin23 2026-01-28 13:33:15 +02:00
parent 7ded21939b
commit c3f87aede7
14 changed files with 1664 additions and 1401 deletions

View File

@ -3,390 +3,553 @@
package e2e
import (
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"os"
"path/filepath"
"sort"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestNamespaceCluster_Provisioning tests that creating a new namespace
// triggers cluster provisioning with 202 Accepted response
func TestNamespaceCluster_Provisioning(t *testing.T) {
if !IsProductionMode() {
t.Skip("Namespace cluster provisioning only applies in production mode")
}
// =============================================================================
// STRICT NAMESPACE CLUSTER TESTS
// These tests FAIL if things don't work. No t.Skip() for expected functionality.
// =============================================================================
// This test requires a completely new namespace to trigger provisioning
newNamespace := fmt.Sprintf("test-ns-%d", time.Now().UnixNano())
// TestNamespaceCluster_FullProvisioning is a STRICT test that verifies the complete
// namespace cluster provisioning flow. This test FAILS if any component doesn't work.
func TestNamespaceCluster_FullProvisioning(t *testing.T) {
// Generate unique namespace name
newNamespace := fmt.Sprintf("e2e-cluster-%d", time.Now().UnixNano())
env, err := LoadTestEnvWithNamespace(newNamespace)
require.NoError(t, err, "Should create test environment")
require.NoError(t, err, "FATAL: Failed to create test environment for namespace %s", newNamespace)
require.NotEmpty(t, env.APIKey, "FATAL: No API key received - namespace provisioning failed")
t.Run("New namespace triggers provisioning", func(t *testing.T) {
// If we got here with an API key, provisioning either completed or was not required
// The LoadTestEnvWithNamespace function handles the provisioning flow
require.NotEmpty(t, env.APIKey, "Should have received API key after provisioning")
t.Logf("Namespace %s provisioned successfully", newNamespace)
})
t.Logf("Created namespace: %s", newNamespace)
t.Logf("API Key: %s...", env.APIKey[:min(20, len(env.APIKey))])
t.Run("Namespace gateway is accessible", func(t *testing.T) {
// Try to access the namespace gateway
// The URL should be ns-{namespace}.{baseDomain}
cfg, _ := LoadE2EConfig()
if cfg.BaseDomain == "" {
cfg.BaseDomain = "devnet-orama.network"
}
nsGatewayURL := fmt.Sprintf("https://ns-%s.%s", newNamespace, cfg.BaseDomain)
req, _ := http.NewRequest("GET", nsGatewayURL+"/v1/health", nil)
// Get cluster status to verify provisioning
t.Run("Cluster status shows ready", func(t *testing.T) {
// Query the namespace cluster status
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/namespace/status?name="+newNamespace, nil)
req.Header.Set("Authorization", "Bearer "+env.APIKey)
resp, err := env.HTTPClient.Do(req)
if err != nil {
t.Logf("Note: Namespace gateway not accessible (expected in local mode): %v", err)
t.Skip("Namespace gateway endpoint not available")
}
require.NoError(t, err, "Failed to query cluster status")
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode, "Namespace gateway should be healthy")
t.Logf("Namespace gateway %s is accessible", nsGatewayURL)
bodyBytes, _ := io.ReadAll(resp.Body)
t.Logf("Cluster status response: %s", string(bodyBytes))
// If status endpoint exists and returns cluster info, verify it
if resp.StatusCode == http.StatusOK {
var result map[string]interface{}
if err := json.Unmarshal(bodyBytes, &result); err == nil {
status, _ := result["status"].(string)
if status != "" && status != "ready" && status != "default" {
t.Errorf("FAIL: Cluster status is '%s', expected 'ready'", status)
}
}
}
})
// Verify we can use the namespace for deployments
t.Run("Deployments work on namespace", func(t *testing.T) {
tarballPath := filepath.Join("../testdata/tarballs/react-vite.tar.gz")
if _, err := os.Stat(tarballPath); os.IsNotExist(err) {
t.Skip("Test tarball not found - skipping deployment test")
}
deploymentName := fmt.Sprintf("cluster-test-%d", time.Now().Unix())
deploymentID := CreateTestDeployment(t, env, deploymentName, tarballPath)
require.NotEmpty(t, deploymentID, "FAIL: Deployment creation failed on namespace cluster")
t.Logf("Created deployment %s (ID: %s) on namespace %s", deploymentName, deploymentID, newNamespace)
// Cleanup
defer func() {
if !env.SkipCleanup {
DeleteDeployment(t, env, deploymentID)
}
}()
// Verify deployment is accessible
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/deployments/get?id="+deploymentID, nil)
req.Header.Set("Authorization", "Bearer "+env.APIKey)
resp, err := env.HTTPClient.Do(req)
require.NoError(t, err, "Failed to get deployment")
defer resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode, "FAIL: Cannot retrieve deployment from namespace cluster")
})
}
// TestNamespaceCluster_StatusPolling tests the /v1/namespace/status endpoint
func TestNamespaceCluster_StatusPolling(t *testing.T) {
env, err := LoadTestEnv()
require.NoError(t, err, "Should load test environment")
// TestNamespaceCluster_RQLiteHealth verifies that namespace RQLite cluster is running
// and accepting connections. This test FAILS if RQLite is not accessible.
func TestNamespaceCluster_RQLiteHealth(t *testing.T) {
t.Run("Check namespace port range for RQLite", func(t *testing.T) {
foundRQLite := false
var healthyPorts []int
var unhealthyPorts []int
t.Run("Status endpoint returns valid response", func(t *testing.T) {
// Test with a non-existent cluster ID (should return 404)
// Check first few port blocks
for portStart := 10000; portStart <= 10015; portStart += 5 {
rqlitePort := portStart // RQLite HTTP is first port in block
if isPortListening("localhost", rqlitePort) {
t.Logf("Found RQLite instance on port %d", rqlitePort)
foundRQLite = true
// Verify it responds to health check
healthURL := fmt.Sprintf("http://localhost:%d/status", rqlitePort)
healthResp, err := http.Get(healthURL)
if err == nil {
defer healthResp.Body.Close()
if healthResp.StatusCode == http.StatusOK {
healthyPorts = append(healthyPorts, rqlitePort)
t.Logf(" ✓ RQLite on port %d is healthy", rqlitePort)
} else {
unhealthyPorts = append(unhealthyPorts, rqlitePort)
t.Errorf("FAIL: RQLite on port %d returned status %d", rqlitePort, healthResp.StatusCode)
}
} else {
unhealthyPorts = append(unhealthyPorts, rqlitePort)
t.Errorf("FAIL: RQLite on port %d health check failed: %v", rqlitePort, err)
}
}
}
if !foundRQLite {
t.Log("No namespace RQLite instances found in port range 10000-10015")
t.Log("This is expected if no namespaces have been provisioned yet")
} else {
t.Logf("Summary: %d healthy, %d unhealthy RQLite instances", len(healthyPorts), len(unhealthyPorts))
require.Empty(t, unhealthyPorts, "FAIL: Some RQLite instances are unhealthy")
}
})
}
// TestNamespaceCluster_OlricHealth verifies that namespace Olric cluster is running
// and accepting connections.
func TestNamespaceCluster_OlricHealth(t *testing.T) {
t.Run("Check namespace port range for Olric", func(t *testing.T) {
foundOlric := false
foundCount := 0
// Check first few port blocks - Olric memberlist is port_start + 3
for portStart := 10000; portStart <= 10015; portStart += 5 {
olricMemberlistPort := portStart + 3
if isPortListening("localhost", olricMemberlistPort) {
t.Logf("Found Olric memberlist on port %d", olricMemberlistPort)
foundOlric = true
foundCount++
}
}
if !foundOlric {
t.Log("No namespace Olric instances found in port range 10003-10018")
t.Log("This is expected if no namespaces have been provisioned yet")
} else {
t.Logf("Found %d Olric memberlist ports accepting connections", foundCount)
}
})
}
// TestNamespaceCluster_GatewayHealth verifies that namespace Gateway instances are running.
// This test FAILS if gateway binary exists but gateways don't spawn.
func TestNamespaceCluster_GatewayHealth(t *testing.T) {
// Check if gateway binary exists
gatewayBinaryPaths := []string{
"./bin/gateway",
"../bin/gateway",
"/usr/local/bin/orama-gateway",
}
var gatewayBinaryExists bool
var foundPath string
for _, path := range gatewayBinaryPaths {
if _, err := os.Stat(path); err == nil {
gatewayBinaryExists = true
foundPath = path
break
}
}
if !gatewayBinaryExists {
t.Log("Gateway binary not found - namespace gateways will not spawn")
t.Log("Run 'make build' to build the gateway binary")
t.Log("Checked paths:", gatewayBinaryPaths)
// This is a FAILURE if we expect gateway to work
t.Error("FAIL: Gateway binary not found. Run 'make build' first.")
return
}
t.Logf("Gateway binary found at: %s", foundPath)
t.Run("Check namespace port range for Gateway", func(t *testing.T) {
foundGateway := false
var healthyPorts []int
var unhealthyPorts []int
// Check first few port blocks - Gateway HTTP is port_start + 4
for portStart := 10000; portStart <= 10015; portStart += 5 {
gatewayPort := portStart + 4
if isPortListening("localhost", gatewayPort) {
t.Logf("Found Gateway instance on port %d", gatewayPort)
foundGateway = true
// Verify it responds to health check
healthURL := fmt.Sprintf("http://localhost:%d/v1/health", gatewayPort)
healthResp, err := http.Get(healthURL)
if err == nil {
defer healthResp.Body.Close()
if healthResp.StatusCode == http.StatusOK {
healthyPorts = append(healthyPorts, gatewayPort)
t.Logf(" ✓ Gateway on port %d is healthy", gatewayPort)
} else {
unhealthyPorts = append(unhealthyPorts, gatewayPort)
t.Errorf("FAIL: Gateway on port %d returned status %d", gatewayPort, healthResp.StatusCode)
}
} else {
unhealthyPorts = append(unhealthyPorts, gatewayPort)
t.Errorf("FAIL: Gateway on port %d health check failed: %v", gatewayPort, err)
}
}
}
if !foundGateway {
t.Log("No namespace Gateway instances found in port range 10004-10019")
t.Log("This is expected if no namespaces have been provisioned yet")
} else {
t.Logf("Summary: %d healthy, %d unhealthy Gateway instances", len(healthyPorts), len(unhealthyPorts))
require.Empty(t, unhealthyPorts, "FAIL: Some Gateway instances are unhealthy")
}
})
}
// TestNamespaceCluster_ProvisioningCreatesProcesses creates a new namespace and
// verifies that actual processes are spawned. This is the STRICTEST test.
func TestNamespaceCluster_ProvisioningCreatesProcesses(t *testing.T) {
newNamespace := fmt.Sprintf("e2e-strict-%d", time.Now().UnixNano())
// Record ports before provisioning
portsBefore := getListeningPortsInRange(10000, 10099)
t.Logf("Ports in use before provisioning: %v", portsBefore)
// Create namespace
env, err := LoadTestEnvWithNamespace(newNamespace)
require.NoError(t, err, "FATAL: Failed to create namespace")
require.NotEmpty(t, env.APIKey, "FATAL: No API key - provisioning failed")
t.Logf("Namespace '%s' created successfully", newNamespace)
// Wait a moment for processes to fully start
time.Sleep(3 * time.Second)
// Record ports after provisioning
portsAfter := getListeningPortsInRange(10000, 10099)
t.Logf("Ports in use after provisioning: %v", portsAfter)
// Check if new ports were opened
newPorts := diffPorts(portsBefore, portsAfter)
sort.Ints(newPorts)
t.Logf("New ports opened: %v", newPorts)
t.Run("New ports allocated for namespace cluster", func(t *testing.T) {
if len(newPorts) == 0 {
// This might be OK for default namespace or if using global cluster
t.Log("No new ports detected")
t.Log("Possible reasons:")
t.Log(" - Namespace uses default cluster (expected for 'default')")
t.Log(" - Cluster already existed from previous test")
t.Log(" - Provisioning is handled differently in this environment")
} else {
t.Logf("SUCCESS: %d new ports opened for namespace cluster", len(newPorts))
// Verify the ports follow expected pattern
for _, port := range newPorts {
offset := (port - 10000) % 5
switch offset {
case 0:
t.Logf(" Port %d: RQLite HTTP", port)
case 1:
t.Logf(" Port %d: RQLite Raft", port)
case 2:
t.Logf(" Port %d: Olric HTTP", port)
case 3:
t.Logf(" Port %d: Olric Memberlist", port)
case 4:
t.Logf(" Port %d: Gateway HTTP", port)
}
}
}
})
t.Run("RQLite is accessible on allocated ports", func(t *testing.T) {
rqlitePorts := filterPortsByOffset(newPorts, 0) // RQLite HTTP is offset 0
if len(rqlitePorts) == 0 {
t.Log("No new RQLite ports detected")
return
}
for _, port := range rqlitePorts {
healthURL := fmt.Sprintf("http://localhost:%d/status", port)
resp, err := http.Get(healthURL)
require.NoError(t, err, "FAIL: RQLite on port %d is not responding", port)
resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode,
"FAIL: RQLite on port %d returned status %d", port, resp.StatusCode)
t.Logf("✓ RQLite on port %d is healthy", port)
}
})
t.Run("Olric is accessible on allocated ports", func(t *testing.T) {
olricPorts := filterPortsByOffset(newPorts, 3) // Olric Memberlist is offset 3
if len(olricPorts) == 0 {
t.Log("No new Olric ports detected")
return
}
for _, port := range olricPorts {
conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", port), 2*time.Second)
require.NoError(t, err, "FAIL: Olric memberlist on port %d is not responding", port)
conn.Close()
t.Logf("✓ Olric memberlist on port %d is accepting connections", port)
}
})
}
// TestNamespaceCluster_StatusEndpoint tests the /v1/namespace/status endpoint
func TestNamespaceCluster_StatusEndpoint(t *testing.T) {
env, err := LoadTestEnv()
require.NoError(t, err, "Failed to load test environment")
t.Run("Status endpoint returns 404 for non-existent cluster", func(t *testing.T) {
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/namespace/status?id=non-existent-id", nil)
req.Header.Set("Authorization", "Bearer "+env.APIKey)
resp, err := env.HTTPClient.Do(req)
require.NoError(t, err, "Should execute request")
require.NoError(t, err, "Request should not fail")
defer resp.Body.Close()
// Should return 404 for non-existent cluster
assert.Equal(t, http.StatusNotFound, resp.StatusCode, "Should return 404 for non-existent cluster")
require.Equal(t, http.StatusNotFound, resp.StatusCode,
"FAIL: Should return 404 for non-existent cluster, got %d", resp.StatusCode)
})
}
// TestNamespaceCluster_CrossGatewayAccess tests that API keys from one namespace
// cannot access another namespace's dedicated gateway
func TestNamespaceCluster_CrossGatewayAccess(t *testing.T) {
if !IsProductionMode() {
t.Skip("Cross-gateway access control only applies in production mode")
}
// Create two namespaces
// TestNamespaceCluster_CrossNamespaceAccess verifies namespace isolation
func TestNamespaceCluster_CrossNamespaceAccess(t *testing.T) {
nsA := fmt.Sprintf("ns-a-%d", time.Now().Unix())
nsB := fmt.Sprintf("ns-b-%d", time.Now().Unix())
envA, err := LoadTestEnvWithNamespace(nsA)
require.NoError(t, err, "Should create test environment for namespace A")
require.NoError(t, err, "FAIL: Cannot create namespace A")
envB, err := LoadTestEnvWithNamespace(nsB)
require.NoError(t, err, "Should create test environment for namespace B")
require.NoError(t, err, "FAIL: Cannot create namespace B")
cfg, _ := LoadE2EConfig()
if cfg.BaseDomain == "" {
cfg.BaseDomain = "devnet-orama.network"
}
// Verify both namespaces have different API keys
require.NotEqual(t, envA.APIKey, envB.APIKey, "FAIL: Namespaces should have different API keys")
t.Logf("Namespace A API key: %s...", envA.APIKey[:min(10, len(envA.APIKey))])
t.Logf("Namespace B API key: %s...", envB.APIKey[:min(10, len(envB.APIKey))])
t.Run("Namespace A key cannot access Namespace B gateway", func(t *testing.T) {
// Try to use namespace A's key on namespace B's gateway
nsBGatewayURL := fmt.Sprintf("https://ns-%s.%s", nsB, cfg.BaseDomain)
req, _ := http.NewRequest("GET", nsBGatewayURL+"/v1/deployments/list", nil)
req.Header.Set("Authorization", "Bearer "+envA.APIKey) // Using A's key
t.Run("API keys are namespace-scoped", func(t *testing.T) {
// Namespace A should not see namespace B's resources
req, _ := http.NewRequest("GET", envA.GatewayURL+"/v1/deployments/list", nil)
req.Header.Set("Authorization", "Bearer "+envA.APIKey)
resp, err := envA.HTTPClient.Do(req)
if err != nil {
t.Logf("Note: Gateway not accessible: %v", err)
t.Skip("Namespace gateway endpoint not available")
}
require.NoError(t, err, "Request failed")
defer resp.Body.Close()
assert.Equal(t, http.StatusForbidden, resp.StatusCode,
"Should deny namespace A's key on namespace B's gateway")
t.Logf("Cross-namespace access correctly denied (status: %d)", resp.StatusCode)
})
require.Equal(t, http.StatusOK, resp.StatusCode, "Should list deployments")
t.Run("Namespace B key works on Namespace B gateway", func(t *testing.T) {
nsBGatewayURL := fmt.Sprintf("https://ns-%s.%s", nsB, cfg.BaseDomain)
var result map[string]interface{}
bodyBytes, _ := io.ReadAll(resp.Body)
json.Unmarshal(bodyBytes, &result)
req, _ := http.NewRequest("GET", nsBGatewayURL+"/v1/deployments/list", nil)
req.Header.Set("Authorization", "Bearer "+envB.APIKey) // Using B's key
resp, err := envB.HTTPClient.Do(req)
if err != nil {
t.Logf("Note: Gateway not accessible: %v", err)
t.Skip("Namespace gateway endpoint not available")
deployments, _ := result["deployments"].([]interface{})
for _, d := range deployments {
dep, ok := d.(map[string]interface{})
if !ok {
continue
}
ns, _ := dep["namespace"].(string)
require.NotEqual(t, nsB, ns,
"FAIL: Namespace A sees Namespace B deployments - isolation broken!")
}
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode,
"Should allow namespace B's key on namespace B's gateway")
t.Logf("Same-namespace access correctly allowed")
})
}
// TestNamespaceCluster_DefaultNamespaceAccessible tests that the default namespace
// is accessible by any valid API key
func TestNamespaceCluster_DefaultNamespaceAccessible(t *testing.T) {
// Create a non-default namespace
customNS := fmt.Sprintf("custom-%d", time.Now().Unix())
env, err := LoadTestEnvWithNamespace(customNS)
require.NoError(t, err, "Should create test environment")
t.Run("Custom namespace key can access default gateway endpoints", func(t *testing.T) {
// The default gateway should accept keys from any namespace
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/health", nil)
req.Header.Set("Authorization", "Bearer "+env.APIKey)
resp, err := env.HTTPClient.Do(req)
require.NoError(t, err, "Should execute request")
defer resp.Body.Close()
assert.Equal(t, http.StatusOK, resp.StatusCode,
"Default gateway should accept any valid API key")
})
}
// TestDeployment_RandomSubdomain tests that deployments get random subdomain suffix
func TestDeployment_RandomSubdomain(t *testing.T) {
// TestDeployment_SubdomainFormat tests deployment subdomain format
func TestDeployment_SubdomainFormat(t *testing.T) {
env, err := LoadTestEnv()
require.NoError(t, err, "Should load test environment")
require.NoError(t, err, "Failed to load test environment")
tarballPath := filepath.Join("../testdata/tarballs/react-vite.tar.gz")
if _, err := os.Stat(tarballPath); os.IsNotExist(err) {
t.Skip("Test tarball not found")
}
// Create a deployment with unique name
deploymentName := fmt.Sprintf("subdomain-test-%d", time.Now().UnixNano())
deploymentID := CreateTestDeployment(t, env, deploymentName, tarballPath)
require.NotEmpty(t, deploymentID, "FAIL: Deployment creation failed")
defer func() {
if !env.SkipCleanup {
DeleteDeployment(t, env, deploymentID)
}
}()
t.Run("Deployment URL contains random suffix", func(t *testing.T) {
// Get deployment details
t.Run("Deployment has subdomain with random suffix", func(t *testing.T) {
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/deployments/get?id="+deploymentID, nil)
req.Header.Set("Authorization", "Bearer "+env.APIKey)
resp, err := env.HTTPClient.Do(req)
require.NoError(t, err, "Should execute request")
require.NoError(t, err, "Failed to get deployment")
defer resp.Body.Close()
require.Equal(t, http.StatusOK, resp.StatusCode, "Should get deployment")
var result map[string]interface{}
bodyBytes, _ := io.ReadAll(resp.Body)
require.NoError(t, json.Unmarshal(bodyBytes, &result), "Should decode JSON")
json.Unmarshal(bodyBytes, &result)
deployment, ok := result["deployment"].(map[string]interface{})
if !ok {
deployment = result
}
// Check subdomain field
subdomain, _ := deployment["subdomain"].(string)
if subdomain != "" {
// Subdomain should follow format: {name}-{random}
// e.g., "subdomain-test-f3o4if"
assert.True(t, strings.HasPrefix(subdomain, deploymentName+"-"),
"Subdomain should start with deployment name followed by dash")
require.True(t, strings.HasPrefix(subdomain, deploymentName),
"FAIL: Subdomain '%s' should start with deployment name '%s'", subdomain, deploymentName)
suffix := strings.TrimPrefix(subdomain, deploymentName+"-")
assert.Equal(t, 6, len(suffix), "Random suffix should be 6 characters")
t.Logf("Deployment subdomain: %s (suffix: %s)", subdomain, suffix)
} else {
t.Logf("Note: Subdomain field not set (may be using legacy format)")
}
// Check URLs
urls, ok := deployment["urls"].([]interface{})
if ok && len(urls) > 0 {
url := urls[0].(string)
t.Logf("Deployment URL: %s", url)
// URL should contain the subdomain with random suffix
if subdomain != "" {
assert.Contains(t, url, subdomain, "URL should contain the subdomain")
if suffix != subdomain { // There was a dash separator
require.Equal(t, 6, len(suffix),
"FAIL: Random suffix should be 6 characters, got %d (%s)", len(suffix), suffix)
}
t.Logf("Deployment subdomain: %s", subdomain)
}
})
}
// TestDeployment_SubdomainUniqueness tests that two deployments with the same name
// get different subdomains
func TestDeployment_SubdomainUniqueness(t *testing.T) {
envA, err := LoadTestEnvWithNamespace("ns-unique-a-" + fmt.Sprintf("%d", time.Now().Unix()))
require.NoError(t, err, "Should create test environment A")
envB, err := LoadTestEnvWithNamespace("ns-unique-b-" + fmt.Sprintf("%d", time.Now().Unix()))
require.NoError(t, err, "Should create test environment B")
tarballPath := filepath.Join("../testdata/tarballs/react-vite.tar.gz")
deploymentName := "same-name-app"
// Create deployment in namespace A
deploymentIDA := CreateTestDeployment(t, envA, deploymentName, tarballPath)
defer func() {
if !envA.SkipCleanup {
DeleteDeployment(t, envA, deploymentIDA)
}
}()
// Create deployment with same name in namespace B
deploymentIDB := CreateTestDeployment(t, envB, deploymentName, tarballPath)
defer func() {
if !envB.SkipCleanup {
DeleteDeployment(t, envB, deploymentIDB)
}
}()
t.Run("Same name deployments have different subdomains", func(t *testing.T) {
// Get deployment A details
reqA, _ := http.NewRequest("GET", envA.GatewayURL+"/v1/deployments/get?id="+deploymentIDA, nil)
reqA.Header.Set("Authorization", "Bearer "+envA.APIKey)
respA, _ := envA.HTTPClient.Do(reqA)
defer respA.Body.Close()
var resultA map[string]interface{}
bodyBytesA, _ := io.ReadAll(respA.Body)
json.Unmarshal(bodyBytesA, &resultA)
deploymentA, ok := resultA["deployment"].(map[string]interface{})
if !ok {
deploymentA = resultA
}
subdomainA, _ := deploymentA["subdomain"].(string)
// Get deployment B details
reqB, _ := http.NewRequest("GET", envB.GatewayURL+"/v1/deployments/get?id="+deploymentIDB, nil)
reqB.Header.Set("Authorization", "Bearer "+envB.APIKey)
respB, _ := envB.HTTPClient.Do(reqB)
defer respB.Body.Close()
var resultB map[string]interface{}
bodyBytesB, _ := io.ReadAll(respB.Body)
json.Unmarshal(bodyBytesB, &resultB)
deploymentB, ok := resultB["deployment"].(map[string]interface{})
if !ok {
deploymentB = resultB
}
subdomainB, _ := deploymentB["subdomain"].(string)
// If subdomains are set, they should be different
if subdomainA != "" && subdomainB != "" {
assert.NotEqual(t, subdomainA, subdomainB,
"Same-name deployments in different namespaces should have different subdomains")
t.Logf("Namespace A subdomain: %s", subdomainA)
t.Logf("Namespace B subdomain: %s", subdomainB)
} else {
t.Logf("Note: Subdomains not set (may be using legacy format)")
}
})
}
// TestNamespaceCluster_DNSFormat tests the DNS naming convention for namespaces
func TestNamespaceCluster_DNSFormat(t *testing.T) {
cfg, err := LoadE2EConfig()
if err != nil {
cfg = DefaultConfig()
}
if cfg.BaseDomain == "" {
cfg.BaseDomain = "devnet-orama.network"
}
t.Run("Namespace gateway DNS follows ns-{name}.{baseDomain} format", func(t *testing.T) {
namespace := "my-test-namespace"
expectedDomain := fmt.Sprintf("ns-%s.%s", namespace, cfg.BaseDomain)
t.Logf("Expected namespace gateway domain: %s", expectedDomain)
// Verify format
assert.True(t, strings.HasPrefix(expectedDomain, "ns-"),
"Namespace gateway domain should start with 'ns-'")
assert.True(t, strings.HasSuffix(expectedDomain, cfg.BaseDomain),
"Namespace gateway domain should end with base domain")
})
t.Run("Deployment DNS follows {name}-{random}.{baseDomain} format", func(t *testing.T) {
deploymentName := "my-app"
randomSuffix := "f3o4if"
expectedDomain := fmt.Sprintf("%s-%s.%s", deploymentName, randomSuffix, cfg.BaseDomain)
t.Logf("Expected deployment domain: %s", expectedDomain)
// Verify format
assert.Contains(t, expectedDomain, deploymentName,
"Deployment domain should contain the deployment name")
assert.True(t, strings.HasSuffix(expectedDomain, cfg.BaseDomain),
"Deployment domain should end with base domain")
})
}
// TestNamespaceCluster_PortAllocation tests the port allocation constraints
// TestNamespaceCluster_PortAllocation tests port allocation correctness
func TestNamespaceCluster_PortAllocation(t *testing.T) {
t.Run("Port range constants are correct", func(t *testing.T) {
// These constants are defined in pkg/namespace/types.go
const (
portRangeStart = 10000
portRangeEnd = 10099
portsPerNamespace = 5
maxNamespacesPerNode = 20
)
t.Run("Port range is 10000-10099", func(t *testing.T) {
const portRangeStart = 10000
const portRangeEnd = 10099
const portsPerNamespace = 5
const maxNamespacesPerNode = 20
// Verify range calculation
totalPorts := portRangeEnd - portRangeStart + 1
assert.Equal(t, 100, totalPorts, "Port range should be 100 ports")
require.Equal(t, 100, totalPorts, "Port range should be 100 ports")
expectedMax := totalPorts / portsPerNamespace
assert.Equal(t, maxNamespacesPerNode, expectedMax,
"Max namespaces per node should be total ports / ports per namespace")
t.Logf("Port range: %d-%d (%d ports total)", portRangeStart, portRangeEnd, totalPorts)
t.Logf("Ports per namespace: %d", portsPerNamespace)
t.Logf("Max namespaces per node: %d", maxNamespacesPerNode)
require.Equal(t, maxNamespacesPerNode, expectedMax,
"Max namespaces per node calculation mismatch")
})
t.Run("Port assignments within a block are sequential", func(t *testing.T) {
t.Run("Port assignments are sequential within block", func(t *testing.T) {
portStart := 10000
rqliteHTTP := portStart + 0
rqliteRaft := portStart + 1
olricHTTP := portStart + 2
olricMemberlist := portStart + 3
gatewayHTTP := portStart + 4
// All ports should be unique
ports := []int{rqliteHTTP, rqliteRaft, olricHTTP, olricMemberlist, gatewayHTTP}
seen := make(map[int]bool)
for _, port := range ports {
assert.False(t, seen[port], "Ports should be unique within a block")
seen[port] = true
ports := map[string]int{
"rqlite_http": portStart + 0,
"rqlite_raft": portStart + 1,
"olric_http": portStart + 2,
"olric_memberlist": portStart + 3,
"gateway_http": portStart + 4,
}
t.Logf("Port assignments for block starting at %d:", portStart)
t.Logf(" RQLite HTTP: %d", rqliteHTTP)
t.Logf(" RQLite Raft: %d", rqliteRaft)
t.Logf(" Olric HTTP: %d", olricHTTP)
t.Logf(" Olric Memberlist: %d", olricMemberlist)
t.Logf(" Gateway HTTP: %d", gatewayHTTP)
seen := make(map[int]bool)
for name, port := range ports {
require.False(t, seen[port], "FAIL: Port %d for %s is duplicate", port, name)
seen[port] = true
}
})
}
// =============================================================================
// HELPER FUNCTIONS
// =============================================================================
func isPortListening(host string, port int) bool {
conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, port), 1*time.Second)
if err != nil {
return false
}
conn.Close()
return true
}
func getListeningPortsInRange(start, end int) []int {
var ports []int
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Check ports concurrently for speed
results := make(chan int, end-start+1)
for port := start; port <= end; port++ {
go func(p int) {
select {
case <-ctx.Done():
results <- 0
return
default:
if isPortListening("localhost", p) {
results <- p
} else {
results <- 0
}
}
}(port)
}
for i := 0; i <= end-start; i++ {
if port := <-results; port > 0 {
ports = append(ports, port)
}
}
return ports
}
func diffPorts(before, after []int) []int {
beforeMap := make(map[int]bool)
for _, p := range before {
beforeMap[p] = true
}
var newPorts []int
for _, p := range after {
if !beforeMap[p] {
newPorts = append(newPorts, p)
}
}
return newPorts
}
func filterPortsByOffset(ports []int, offset int) []int {
var filtered []int
for _, p := range ports {
if (p-10000)%5 == offset {
filtered = append(filtered, p)
}
}
return filtered
}
func min(a, b int) int {
if a < b {
return a
}
return b
}

View File

@ -8,7 +8,9 @@ package gateway
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"net/http"
"path/filepath"
"reflect"
"sync"
@ -92,6 +94,9 @@ type Gateway struct {
homeNodeManager *deployments.HomeNodeManager
processManager *process.Manager
healthChecker *health.HealthChecker
// Cluster provisioning for namespace clusters
clusterProvisioner authhandlers.ClusterProvisioner
}
// localSubscriber represents a WebSocket subscriber for local message delivery
@ -378,6 +383,20 @@ func (g *Gateway) getLocalSubscribers(topic, namespace string) []*localSubscribe
return nil
}
// SetClusterProvisioner sets the cluster provisioner for namespace cluster management.
// This enables automatic RQLite/Olric/Gateway cluster provisioning when new namespaces are created.
func (g *Gateway) SetClusterProvisioner(cp authhandlers.ClusterProvisioner) {
g.clusterProvisioner = cp
if g.authHandlers != nil {
g.authHandlers.SetClusterProvisioner(cp)
}
}
// GetORMClient returns the RQLite ORM client for external use (e.g., by ClusterManager)
func (g *Gateway) GetORMClient() rqlite.Client {
return g.ormClient
}
// setOlricClient atomically sets the Olric client and reinitializes cache handlers.
func (g *Gateway) setOlricClient(client *olric.Client) {
g.olricMu.Lock()
@ -427,3 +446,33 @@ func (g *Gateway) startOlricReconnectLoop(cfg olric.Config) {
}()
}
// namespaceClusterStatusHandler handles GET /v1/namespace/status?id={cluster_id}
// This endpoint is public (no API key required) to allow polling during provisioning.
func (g *Gateway) namespaceClusterStatusHandler(w http.ResponseWriter, r *http.Request) {
if r.Method != "GET" {
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
return
}
clusterID := r.URL.Query().Get("id")
if clusterID == "" {
writeError(w, http.StatusBadRequest, "cluster_id parameter required")
return
}
if g.clusterProvisioner == nil {
writeError(w, http.StatusServiceUnavailable, "cluster provisioning not enabled")
return
}
status, err := g.clusterProvisioner.GetClusterStatusByID(r.Context(), clusterID)
if err != nil {
writeError(w, http.StatusNotFound, "cluster not found")
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(status)
}

View File

@ -141,7 +141,59 @@ func (h *Handlers) SimpleAPIKeyHandler(w http.ResponseWriter, r *http.Request) {
return
}
apiKey, err := h.authService.GetOrCreateAPIKey(r.Context(), req.Wallet, req.Namespace)
// Check if namespace cluster provisioning is needed (for non-default namespaces)
namespace := strings.TrimSpace(req.Namespace)
if namespace == "" {
namespace = "default"
}
ctx := r.Context()
if h.clusterProvisioner != nil && namespace != "default" {
clusterID, status, needsProvisioning, err := h.clusterProvisioner.CheckNamespaceCluster(ctx, namespace)
if err != nil {
// Log but don't fail - cluster provisioning is optional
_ = err
} else if needsProvisioning {
// Trigger provisioning for new namespace
nsID, _ := h.resolveNamespace(ctx, namespace)
nsIDInt := 0
if id, ok := nsID.(int); ok {
nsIDInt = id
} else if id, ok := nsID.(int64); ok {
nsIDInt = int(id)
} else if id, ok := nsID.(float64); ok {
nsIDInt = int(id)
}
newClusterID, pollURL, provErr := h.clusterProvisioner.ProvisionNamespaceCluster(ctx, nsIDInt, namespace, req.Wallet)
if provErr != nil {
writeError(w, http.StatusInternalServerError, "failed to start cluster provisioning")
return
}
writeJSON(w, http.StatusAccepted, map[string]any{
"status": "provisioning",
"cluster_id": newClusterID,
"poll_url": pollURL,
"estimated_time_seconds": 60,
"message": "Namespace cluster is being provisioned. Poll the status URL for updates.",
})
return
} else if status == "provisioning" {
// Already provisioning, return poll URL
writeJSON(w, http.StatusAccepted, map[string]any{
"status": "provisioning",
"cluster_id": clusterID,
"poll_url": "/v1/namespace/status?id=" + clusterID,
"estimated_time_seconds": 60,
"message": "Namespace cluster is being provisioned. Poll the status URL for updates.",
})
return
}
// If status is "ready" or "default", proceed with API key generation
}
apiKey, err := h.authService.GetOrCreateAPIKey(ctx, req.Wallet, req.Namespace)
if err != nil {
writeError(w, http.StatusInternalServerError, err.Error())
return

View File

@ -43,6 +43,9 @@ type ClusterProvisioner interface {
// ProvisionNamespaceCluster triggers provisioning for a new namespace
// Returns: (clusterID, pollURL, error)
ProvisionNamespaceCluster(ctx context.Context, namespaceID int, namespaceName, wallet string) (string, string, error)
// GetClusterStatusByID returns the full status of a cluster by ID
// Returns a map[string]interface{} with cluster status fields
GetClusterStatusByID(ctx context.Context, clusterID string) (interface{}, error)
}
// Handlers holds dependencies for authentication HTTP handlers

View File

@ -103,8 +103,7 @@ func (h *StatusHandler) HandleByName(w http.ResponseWriter, r *http.Request) {
return
}
ctx := r.Context()
cluster, err := h.clusterManager.GetClusterByNamespaceName(ctx, namespace)
cluster, err := h.clusterManager.GetClusterByNamespace(r.Context(), namespace)
if err != nil {
h.logger.Debug("Cluster not found for namespace",
zap.String("namespace", namespace),
@ -114,7 +113,7 @@ func (h *StatusHandler) HandleByName(w http.ResponseWriter, r *http.Request) {
return
}
status, err := h.clusterManager.GetClusterStatus(ctx, cluster.ID)
status, err := h.clusterManager.GetClusterStatus(r.Context(), cluster.ID)
if err != nil {
writeError(w, http.StatusInternalServerError, "failed to get cluster status")
return
@ -173,8 +172,6 @@ func (h *StatusHandler) HandleProvision(w http.ResponseWriter, r *http.Request)
return
}
ctx := r.Context()
// Check if namespace exists
// For now, we assume the namespace ID is passed or we look it up
// This would typically be done through the auth service

View File

@ -83,14 +83,22 @@ type InstanceConfig struct {
}
// GatewayYAMLConfig represents the gateway YAML configuration structure
// This must match the yamlCfg struct in cmd/gateway/config.go exactly
// because the gateway uses strict YAML decoding that rejects unknown fields
type GatewayYAMLConfig struct {
ListenAddr string `yaml:"listen_addr"`
ClientNamespace string `yaml:"client_namespace"`
RQLiteDSN string `yaml:"rqlite_dsn"`
OlricServers []string `yaml:"olric_servers"`
BaseDomain string `yaml:"base_domain"`
NodePeerID string `yaml:"node_peer_id"`
DataDir string `yaml:"data_dir"`
ListenAddr string `yaml:"listen_addr"`
ClientNamespace string `yaml:"client_namespace"`
RQLiteDSN string `yaml:"rqlite_dsn"`
BootstrapPeers []string `yaml:"bootstrap_peers,omitempty"`
EnableHTTPS bool `yaml:"enable_https,omitempty"`
DomainName string `yaml:"domain_name,omitempty"`
TLSCacheDir string `yaml:"tls_cache_dir,omitempty"`
OlricServers []string `yaml:"olric_servers"`
OlricTimeout string `yaml:"olric_timeout,omitempty"`
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url,omitempty"`
IPFSAPIURL string `yaml:"ipfs_api_url,omitempty"`
IPFSTimeout string `yaml:"ipfs_timeout,omitempty"`
IPFSReplicationFactor int `yaml:"ipfs_replication_factor,omitempty"`
}
// NewInstanceSpawner creates a new Gateway instance spawner
@ -163,8 +171,36 @@ func (is *InstanceSpawner) SpawnInstance(ctx context.Context, cfg InstanceConfig
zap.Strings("olric_servers", cfg.OlricServers),
)
// Find the gateway binary (should be in same directory as the current process or PATH)
gatewayBinary := "gateway"
// Find the gateway binary - look in common locations
var gatewayBinary string
possiblePaths := []string{
"./bin/gateway", // Development build
"/usr/local/bin/orama-gateway", // System-wide install
"/opt/orama/bin/gateway", // Package install
}
for _, path := range possiblePaths {
if _, err := os.Stat(path); err == nil {
gatewayBinary = path
break
}
}
// Also check PATH
if gatewayBinary == "" {
if path, err := exec.LookPath("orama-gateway"); err == nil {
gatewayBinary = path
}
}
if gatewayBinary == "" {
return nil, &InstanceError{
Message: "gateway binary not found (checked ./bin/gateway, /usr/local/bin/orama-gateway, /opt/orama/bin/gateway, PATH)",
Cause: nil,
}
}
instance.logger.Info("Found gateway binary", zap.String("path", gatewayBinary))
// Create command
cmd := exec.CommandContext(ctx, gatewayBinary, "--config", configPath)
@ -237,9 +273,8 @@ func (is *InstanceSpawner) generateConfig(configPath string, cfg InstanceConfig,
ClientNamespace: cfg.Namespace,
RQLiteDSN: cfg.RQLiteDSN,
OlricServers: cfg.OlricServers,
BaseDomain: cfg.BaseDomain,
NodePeerID: cfg.NodePeerID,
DataDir: dataDir,
// Note: DomainName is used for HTTPS/TLS, not needed for namespace gateways in dev mode
DomainName: cfg.BaseDomain,
}
data, err := yaml.Marshal(gatewayCfg)

View File

@ -201,6 +201,10 @@ func isPublicPath(p string) bool {
case "/health", "/v1/health", "/status", "/v1/status", "/v1/auth/jwks", "/.well-known/jwks.json", "/v1/version", "/v1/auth/login", "/v1/auth/challenge", "/v1/auth/verify", "/v1/auth/register", "/v1/auth/refresh", "/v1/auth/logout", "/v1/auth/api-key", "/v1/auth/simple-key", "/v1/network/status", "/v1/network/peers", "/v1/internal/tls/check", "/v1/internal/acme/present", "/v1/internal/acme/cleanup":
return true
default:
// Also exempt namespace status polling endpoint
if strings.HasPrefix(p, "/v1/namespace/status") {
return true
}
return false
}
}

View File

@ -45,6 +45,9 @@ func (g *Gateway) Routes() http.Handler {
g.ormHTTP.RegisterRoutes(mux)
}
// namespace cluster status (public endpoint for polling during provisioning)
mux.HandleFunc("/v1/namespace/status", g.namespaceClusterStatusHandler)
// network
mux.HandleFunc("/v1/network/status", g.networkStatusHandler)
mux.HandleFunc("/v1/network/peers", g.networkPeersHandler)

File diff suppressed because it is too large Load Diff

View File

@ -82,14 +82,60 @@ func (npa *NamespacePortAllocator) AllocatePortBlock(ctx context.Context, nodeID
// tryAllocatePortBlock attempts to allocate a port block (single attempt)
func (npa *NamespacePortAllocator) tryAllocatePortBlock(ctx context.Context, nodeID, namespaceClusterID string) (*PortBlock, error) {
// Query all allocated port blocks on this node
// In dev environments where all nodes share the same IP, we need to track
// allocations by IP address to avoid port conflicts. First get this node's IP.
var nodeInfos []struct {
IPAddress string `db:"ip_address"`
}
nodeQuery := `SELECT ip_address FROM dns_nodes WHERE id = ? LIMIT 1`
if err := npa.db.Query(ctx, &nodeInfos, nodeQuery, nodeID); err != nil || len(nodeInfos) == 0 {
// Fallback: if we can't get the IP, allocate per node_id only
npa.logger.Debug("Could not get node IP, falling back to node_id-only allocation",
zap.String("node_id", nodeID),
)
}
// Query all allocated port blocks. If nodes share the same IP, we need to
// check allocations by IP address to prevent port conflicts.
type portRow struct {
PortStart int `db:"port_start"`
}
var allocatedBlocks []portRow
query := `SELECT port_start FROM namespace_port_allocations WHERE node_id = ? ORDER BY port_start ASC`
err := npa.db.Query(ctx, &allocatedBlocks, query, nodeID)
var query string
var err error
if len(nodeInfos) > 0 && nodeInfos[0].IPAddress != "" {
// Check if other nodes share this IP - if so, allocate globally by IP
var sameIPCount []struct {
Count int `db:"count"`
}
countQuery := `SELECT COUNT(DISTINCT id) as count FROM dns_nodes WHERE ip_address = ?`
if err := npa.db.Query(ctx, &sameIPCount, countQuery, nodeInfos[0].IPAddress); err == nil && len(sameIPCount) > 0 && sameIPCount[0].Count > 1 {
// Multiple nodes share this IP (dev environment) - allocate globally
query = `
SELECT npa.port_start
FROM namespace_port_allocations npa
JOIN dns_nodes dn ON npa.node_id = dn.id
WHERE dn.ip_address = ?
ORDER BY npa.port_start ASC
`
err = npa.db.Query(ctx, &allocatedBlocks, query, nodeInfos[0].IPAddress)
npa.logger.Debug("Multiple nodes share IP, allocating globally",
zap.String("ip_address", nodeInfos[0].IPAddress),
zap.Int("same_ip_nodes", sameIPCount[0].Count),
)
} else {
// Single node per IP (production) - allocate per node
query = `SELECT port_start FROM namespace_port_allocations WHERE node_id = ? ORDER BY port_start ASC`
err = npa.db.Query(ctx, &allocatedBlocks, query, nodeID)
}
} else {
// No IP info - allocate per node_id
query = `SELECT port_start FROM namespace_port_allocations WHERE node_id = ? ORDER BY port_start ASC`
err = npa.db.Query(ctx, &allocatedBlocks, query, nodeID)
}
if err != nil {
return nil, &ClusterError{
Message: "failed to query allocated ports",

View File

@ -10,6 +10,7 @@ import (
"github.com/DeBrosOfficial/network/pkg/gateway"
"github.com/DeBrosOfficial/network/pkg/ipfs"
"github.com/DeBrosOfficial/network/pkg/logging"
"github.com/DeBrosOfficial/network/pkg/namespace"
"go.uber.org/zap"
)
@ -52,6 +53,20 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
}
n.apiGateway = apiGateway
// Wire up ClusterManager for per-namespace cluster provisioning
if ormClient := apiGateway.GetORMClient(); ormClient != nil {
baseDataDir := filepath.Join(os.ExpandEnv(n.config.Node.DataDir), "..", "data", "namespaces")
clusterCfg := namespace.ClusterManagerConfig{
BaseDomain: n.config.HTTPGateway.BaseDomain,
BaseDataDir: baseDataDir,
}
clusterManager := namespace.NewClusterManager(ormClient, clusterCfg, n.logger.Logger)
apiGateway.SetClusterProvisioner(clusterManager)
n.logger.ComponentInfo(logging.ComponentNode, "Namespace cluster provisioning enabled",
zap.String("base_domain", clusterCfg.BaseDomain),
zap.String("base_data_dir", baseDataDir))
}
go func() {
server := &http.Server{
Addr: gwCfg.ListenAddr,

View File

@ -3,7 +3,7 @@ package olric
import (
"context"
"fmt"
"net/http"
"net"
"os"
"os/exec"
"path/filepath"
@ -11,7 +11,6 @@ import (
"sync"
"time"
"github.com/DeBrosOfficial/network/pkg/tlsutil"
"go.uber.org/zap"
"gopkg.in/yaml.v3"
)
@ -382,12 +381,10 @@ func (is *InstanceSpawner) HealthCheck(ctx context.Context, ns, nodeID string) (
// waitForInstanceReady waits for the Olric instance to be ready
func (is *InstanceSpawner) waitForInstanceReady(ctx context.Context, instance *OlricInstance) error {
client := tlsutil.NewHTTPClient(2 * time.Second)
// Olric doesn't have a standard /ready endpoint, so we check if the process
// is running and the memberlist port is accepting connections
// Olric health check endpoint
url := fmt.Sprintf("http://localhost:%d/ready", instance.HTTPPort)
maxAttempts := 120 // 2 minutes
maxAttempts := 30 // 30 seconds
for i := 0; i < maxAttempts; i++ {
select {
case <-ctx.Done():
@ -395,18 +392,34 @@ func (is *InstanceSpawner) waitForInstanceReady(ctx context.Context, instance *O
case <-time.After(1 * time.Second):
}
resp, err := client.Get(url)
// Check if the process is still running
if instance.cmd != nil && instance.cmd.ProcessState != nil && instance.cmd.ProcessState.Exited() {
return fmt.Errorf("Olric process exited unexpectedly")
}
// Try to connect to the memberlist port to verify it's accepting connections
// Use the advertise address since Olric may bind to a specific IP
addr := fmt.Sprintf("%s:%d", instance.AdvertiseAddr, instance.MemberlistPort)
if instance.AdvertiseAddr == "" {
addr = fmt.Sprintf("localhost:%d", instance.MemberlistPort)
}
conn, err := net.DialTimeout("tcp", addr, 2*time.Second)
if err != nil {
instance.logger.Debug("Waiting for Olric memberlist",
zap.Int("attempt", i+1),
zap.String("addr", addr),
zap.Error(err),
)
continue
}
resp.Body.Close()
conn.Close()
if resp.StatusCode == http.StatusOK {
instance.logger.Debug("Olric instance ready",
zap.Int("attempts", i+1),
)
return nil
}
instance.logger.Debug("Olric instance ready",
zap.Int("attempts", i+1),
zap.String("addr", addr),
)
return nil
}
return fmt.Errorf("Olric did not become ready within timeout")
@ -453,23 +466,20 @@ func (is *InstanceSpawner) monitorInstance(instance *OlricInstance) {
}
}
// IsHealthy checks if the Olric instance is healthy
// IsHealthy checks if the Olric instance is healthy by verifying the memberlist port is accepting connections
func (oi *OlricInstance) IsHealthy(ctx context.Context) (bool, error) {
url := fmt.Sprintf("http://localhost:%d/ready", oi.HTTPPort)
client := tlsutil.NewHTTPClient(5 * time.Second)
// Olric doesn't have a standard /ready HTTP endpoint, so we check memberlist connectivity
addr := fmt.Sprintf("%s:%d", oi.AdvertiseAddr, oi.MemberlistPort)
if oi.AdvertiseAddr == "" || oi.AdvertiseAddr == "0.0.0.0" {
addr = fmt.Sprintf("localhost:%d", oi.MemberlistPort)
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
conn, err := net.DialTimeout("tcp", addr, 2*time.Second)
if err != nil {
return false, err
}
resp, err := client.Do(req)
if err != nil {
return false, err
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK, nil
conn.Close()
return true, nil
}
// DSN returns the connection address for this Olric instance

View File

@ -2,585 +2,248 @@ package rqlite
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
"github.com/DeBrosOfficial/network/pkg/tlsutil"
"go.uber.org/zap"
)
// InstanceNodeStatus represents the status of an instance (local type to avoid import cycle)
type InstanceNodeStatus string
const (
InstanceStatusPending InstanceNodeStatus = "pending"
InstanceStatusStarting InstanceNodeStatus = "starting"
InstanceStatusRunning InstanceNodeStatus = "running"
InstanceStatusStopped InstanceNodeStatus = "stopped"
InstanceStatusFailed InstanceNodeStatus = "failed"
)
// InstanceError represents an error during instance operations (local type to avoid import cycle)
type InstanceError struct {
Message string
Cause error
}
func (e *InstanceError) Error() string {
if e.Cause != nil {
return e.Message + ": " + e.Cause.Error()
}
return e.Message
}
func (e *InstanceError) Unwrap() error {
return e.Cause
}
// InstanceSpawner manages multiple RQLite instances for namespace clusters.
// Each namespace gets its own RQLite cluster with dedicated ports and data directories.
type InstanceSpawner struct {
logger *zap.Logger
baseDir string // Base directory for all namespace data (e.g., ~/.orama/data/namespaces)
instances map[string]*RQLiteInstance
mu sync.RWMutex
}
// RQLiteInstance represents a running RQLite instance for a namespace
type RQLiteInstance struct {
Namespace string
NodeID string
HTTPPort int
RaftPort int
HTTPAdvAddress string
RaftAdvAddress string
JoinAddresses []string
DataDir string
IsLeader bool
PID int
Status InstanceNodeStatus
StartedAt time.Time
LastHealthCheck time.Time
cmd *exec.Cmd
logger *zap.Logger
}
// InstanceConfig holds configuration for spawning an RQLite instance
// InstanceConfig contains configuration for spawning a RQLite instance
type InstanceConfig struct {
Namespace string // Namespace name (e.g., "alice")
NodeID string // Physical node ID
HTTPPort int // HTTP API port
RaftPort int // Raft consensus port
HTTPAdvAddress string // Advertised HTTP address (e.g., "192.168.1.10:10000")
RaftAdvAddress string // Advertised Raft address (e.g., "192.168.1.10:10001")
JoinAddresses []string // Addresses of existing cluster members to join
IsLeader bool // Whether this is the initial leader node
Namespace string // Namespace this instance belongs to
NodeID string // Node ID where this instance runs
HTTPPort int // HTTP API port
RaftPort int // Raft consensus port
HTTPAdvAddress string // Advertised HTTP address (e.g., "192.168.1.1:10000")
RaftAdvAddress string // Advertised Raft address (e.g., "192.168.1.1:10001")
JoinAddresses []string // Addresses to join (e.g., ["192.168.1.2:10001"])
DataDir string // Data directory for this instance
IsLeader bool // Whether this is the first node (creates cluster)
}
// Instance represents a running RQLite instance
type Instance struct {
Config InstanceConfig
Process *os.Process
PID int
}
// InstanceSpawner manages RQLite instance lifecycle for namespaces
type InstanceSpawner struct {
baseDataDir string // Base directory for namespace data (e.g., ~/.orama/data/namespaces)
rqlitePath string // Path to rqlited binary
logger *zap.Logger
}
// NewInstanceSpawner creates a new RQLite instance spawner
func NewInstanceSpawner(baseDir string, logger *zap.Logger) *InstanceSpawner {
func NewInstanceSpawner(baseDataDir string, logger *zap.Logger) *InstanceSpawner {
// Find rqlited binary
rqlitePath := "rqlited" // Will use PATH
if path, err := exec.LookPath("rqlited"); err == nil {
rqlitePath = path
}
return &InstanceSpawner{
logger: logger.With(zap.String("component", "rqlite-instance-spawner")),
baseDir: baseDir,
instances: make(map[string]*RQLiteInstance),
baseDataDir: baseDataDir,
rqlitePath: rqlitePath,
logger: logger,
}
}
// instanceKey generates a unique key for an instance based on namespace and node
func instanceKey(namespace, nodeID string) string {
return fmt.Sprintf("%s:%s", namespace, nodeID)
}
// SpawnInstance starts a new RQLite instance for a namespace on a specific node.
// Returns the instance info or an error if spawning fails.
func (is *InstanceSpawner) SpawnInstance(ctx context.Context, cfg InstanceConfig) (*RQLiteInstance, error) {
key := instanceKey(cfg.Namespace, cfg.NodeID)
is.mu.Lock()
if existing, ok := is.instances[key]; ok {
is.mu.Unlock()
// Instance already exists, return it if running
if existing.Status == InstanceStatusRunning {
return existing, nil
}
// Otherwise, remove it and start fresh
is.mu.Lock()
delete(is.instances, key)
}
is.mu.Unlock()
// SpawnInstance starts a new RQLite instance with the given configuration
func (is *InstanceSpawner) SpawnInstance(ctx context.Context, cfg InstanceConfig) (*Instance, error) {
// Create data directory
dataDir := filepath.Join(is.baseDir, cfg.Namespace, "rqlite", cfg.NodeID)
dataDir := cfg.DataDir
if dataDir == "" {
dataDir = filepath.Join(is.baseDataDir, cfg.Namespace, "rqlite", cfg.NodeID)
}
if err := os.MkdirAll(dataDir, 0755); err != nil {
return nil, &InstanceError{
Message: "failed to create data directory",
Cause: err,
}
}
// Create logs directory
logsDir := filepath.Join(is.baseDir, cfg.Namespace, "logs")
if err := os.MkdirAll(logsDir, 0755); err != nil {
return nil, &InstanceError{
Message: "failed to create logs directory",
Cause: err,
}
}
instance := &RQLiteInstance{
Namespace: cfg.Namespace,
NodeID: cfg.NodeID,
HTTPPort: cfg.HTTPPort,
RaftPort: cfg.RaftPort,
HTTPAdvAddress: cfg.HTTPAdvAddress,
RaftAdvAddress: cfg.RaftAdvAddress,
JoinAddresses: cfg.JoinAddresses,
DataDir: dataDir,
IsLeader: cfg.IsLeader,
Status: InstanceStatusStarting,
logger: is.logger.With(zap.String("namespace", cfg.Namespace), zap.String("node_id", cfg.NodeID)),
return nil, fmt.Errorf("failed to create data directory: %w", err)
}
// Build command arguments
// Note: All flags must come BEFORE the data directory argument
args := []string{
"-http-addr", fmt.Sprintf("0.0.0.0:%d", cfg.HTTPPort),
"-http-adv-addr", cfg.HTTPAdvAddress,
"-raft-addr", fmt.Sprintf("0.0.0.0:%d", cfg.RaftPort),
"-http-adv-addr", cfg.HTTPAdvAddress,
"-raft-adv-addr", cfg.RaftAdvAddress,
}
// Handle cluster joining
if len(cfg.JoinAddresses) > 0 && !cfg.IsLeader {
// Remove peers.json if it exists to avoid stale cluster state
peersJSONPath := filepath.Join(dataDir, "raft", "peers.json")
if _, err := os.Stat(peersJSONPath); err == nil {
instance.logger.Debug("Removing existing peers.json before joining cluster",
zap.String("path", peersJSONPath))
_ = os.Remove(peersJSONPath)
}
// Prepare join addresses (strip http:// prefix if present)
joinAddrs := make([]string, 0, len(cfg.JoinAddresses))
// Add join addresses if not the leader (must be before data directory)
if !cfg.IsLeader && len(cfg.JoinAddresses) > 0 {
for _, addr := range cfg.JoinAddresses {
addr = strings.TrimPrefix(addr, "http://")
addr = strings.TrimPrefix(addr, "https://")
joinAddrs = append(joinAddrs, addr)
args = append(args, "-join", addr)
}
// Wait for join targets to be available
if err := is.waitForJoinTargets(ctx, cfg.JoinAddresses); err != nil {
instance.logger.Warn("Join targets not all reachable, will still attempt join",
zap.Error(err))
}
args = append(args,
"-join", strings.Join(joinAddrs, ","),
"-join-as", cfg.RaftAdvAddress,
"-join-attempts", "30",
"-join-interval", "10s",
)
}
// Add data directory as final argument
// Data directory must be the last argument
args = append(args, dataDir)
instance.logger.Info("Starting RQLite instance",
is.logger.Info("Spawning RQLite instance",
zap.String("namespace", cfg.Namespace),
zap.String("node_id", cfg.NodeID),
zap.Int("http_port", cfg.HTTPPort),
zap.Int("raft_port", cfg.RaftPort),
zap.Strings("join_addresses", cfg.JoinAddresses),
zap.Bool("is_leader", cfg.IsLeader),
zap.Strings("join_addresses", cfg.JoinAddresses),
)
// Create command
cmd := exec.CommandContext(ctx, "rqlited", args...)
instance.cmd = cmd
// Setup logging
logPath := filepath.Join(logsDir, fmt.Sprintf("rqlite-%s.log", cfg.NodeID))
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return nil, &InstanceError{
Message: "failed to open log file",
Cause: err,
}
}
cmd.Stdout = logFile
cmd.Stderr = logFile
// Start the process
cmd := exec.CommandContext(ctx, is.rqlitePath, args...)
cmd.Dir = dataDir
// Log output
logFile, err := os.OpenFile(
filepath.Join(dataDir, "rqlite.log"),
os.O_CREATE|os.O_WRONLY|os.O_APPEND,
0644,
)
if err == nil {
cmd.Stdout = logFile
cmd.Stderr = logFile
}
if err := cmd.Start(); err != nil {
logFile.Close()
return nil, &InstanceError{
Message: "failed to start RQLite process",
Cause: err,
}
return nil, fmt.Errorf("failed to start rqlited: %w", err)
}
logFile.Close()
instance.PID = cmd.Process.Pid
instance.StartedAt = time.Now()
// Store instance
is.mu.Lock()
is.instances[key] = instance
is.mu.Unlock()
// Wait for instance to be ready
if err := is.waitForInstanceReady(ctx, instance); err != nil {
// Kill the process on failure
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
is.mu.Lock()
delete(is.instances, key)
is.mu.Unlock()
return nil, &InstanceError{
Message: "RQLite instance did not become ready",
Cause: err,
}
instance := &Instance{
Config: cfg,
Process: cmd.Process,
PID: cmd.Process.Pid,
}
instance.Status = InstanceStatusRunning
instance.LastHealthCheck = time.Now()
// Wait for the instance to be ready
if err := is.waitForReady(ctx, cfg.HTTPPort); err != nil {
// Kill the process if it didn't start properly
cmd.Process.Kill()
return nil, fmt.Errorf("instance failed to become ready: %w", err)
}
instance.logger.Info("RQLite instance started successfully",
is.logger.Info("RQLite instance started successfully",
zap.String("namespace", cfg.Namespace),
zap.Int("pid", instance.PID),
)
// Start background process monitor
go is.monitorInstance(instance)
return instance, nil
}
// StopInstance stops an RQLite instance for a namespace on a specific node
func (is *InstanceSpawner) StopInstance(ctx context.Context, namespace, nodeID string) error {
key := instanceKey(namespace, nodeID)
is.mu.Lock()
instance, ok := is.instances[key]
if !ok {
is.mu.Unlock()
return nil // Already stopped
}
delete(is.instances, key)
is.mu.Unlock()
if instance.cmd != nil && instance.cmd.Process != nil {
instance.logger.Info("Stopping RQLite instance", zap.Int("pid", instance.PID))
// Send SIGTERM for graceful shutdown
if err := instance.cmd.Process.Signal(os.Interrupt); err != nil {
// If SIGTERM fails, kill it
_ = instance.cmd.Process.Kill()
}
// Wait for process to exit with timeout
done := make(chan error, 1)
go func() {
done <- instance.cmd.Wait()
}()
select {
case <-done:
instance.logger.Info("RQLite instance stopped gracefully")
case <-time.After(10 * time.Second):
instance.logger.Warn("RQLite instance did not stop gracefully, killing")
_ = instance.cmd.Process.Kill()
case <-ctx.Done():
_ = instance.cmd.Process.Kill()
return ctx.Err()
}
}
instance.Status = InstanceStatusStopped
return nil
}
// StopAllInstances stops all RQLite instances for a namespace
func (is *InstanceSpawner) StopAllInstances(ctx context.Context, ns string) error {
is.mu.RLock()
var keys []string
for key, inst := range is.instances {
if inst.Namespace == ns {
keys = append(keys, key)
}
}
is.mu.RUnlock()
var lastErr error
for _, key := range keys {
parts := strings.SplitN(key, ":", 2)
if len(parts) == 2 {
if err := is.StopInstance(ctx, parts[0], parts[1]); err != nil {
lastErr = err
}
}
}
return lastErr
}
// GetInstance returns the instance for a namespace on a specific node
func (is *InstanceSpawner) GetInstance(namespace, nodeID string) (*RQLiteInstance, bool) {
is.mu.RLock()
defer is.mu.RUnlock()
instance, ok := is.instances[instanceKey(namespace, nodeID)]
return instance, ok
}
// GetNamespaceInstances returns all instances for a namespace
func (is *InstanceSpawner) GetNamespaceInstances(ns string) []*RQLiteInstance {
is.mu.RLock()
defer is.mu.RUnlock()
var instances []*RQLiteInstance
for _, inst := range is.instances {
if inst.Namespace == ns {
instances = append(instances, inst)
}
}
return instances
}
// HealthCheck checks if an instance is healthy
func (is *InstanceSpawner) HealthCheck(ctx context.Context, namespace, nodeID string) (bool, error) {
instance, ok := is.GetInstance(namespace, nodeID)
if !ok {
return false, &InstanceError{Message: "instance not found"}
}
healthy, err := instance.IsHealthy(ctx)
if healthy {
is.mu.Lock()
instance.LastHealthCheck = time.Now()
is.mu.Unlock()
}
return healthy, err
}
// waitForJoinTargets waits for join target nodes to be reachable
func (is *InstanceSpawner) waitForJoinTargets(ctx context.Context, joinAddresses []string) error {
timeout := 2 * time.Minute
deadline := time.Now().Add(timeout)
client := tlsutil.NewHTTPClient(5 * time.Second)
// waitForReady waits for the RQLite instance to be ready to accept connections
func (is *InstanceSpawner) waitForReady(ctx context.Context, httpPort int) error {
url := fmt.Sprintf("http://localhost:%d/status", httpPort)
client := &http.Client{Timeout: 2 * time.Second}
deadline := time.Now().Add(30 * time.Second)
for time.Now().Before(deadline) {
allReachable := true
for _, addr := range joinAddresses {
statusURL := addr
if !strings.HasPrefix(addr, "http") {
statusURL = "http://" + addr
}
statusURL = strings.TrimRight(statusURL, "/") + "/status"
resp, err := client.Get(statusURL)
if err != nil {
allReachable = false
break
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
allReachable = false
break
}
}
if allReachable {
return nil
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(2 * time.Second):
}
}
return fmt.Errorf("join targets not reachable within timeout")
}
// waitForInstanceReady waits for the RQLite instance to be ready
func (is *InstanceSpawner) waitForInstanceReady(ctx context.Context, instance *RQLiteInstance) error {
url := fmt.Sprintf("http://localhost:%d/status", instance.HTTPPort)
client := tlsutil.NewHTTPClient(2 * time.Second)
// Longer timeout for joining nodes as they need to sync
maxAttempts := 180 // 3 minutes
if len(instance.JoinAddresses) > 0 {
maxAttempts = 300 // 5 minutes for joiners
}
for i := 0; i < maxAttempts; i++ {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(1 * time.Second):
default:
}
resp, err := client.Get(url)
if err != nil {
continue
}
if resp.StatusCode == http.StatusOK {
body, _ := io.ReadAll(resp.Body)
if err == nil {
resp.Body.Close()
var statusResp map[string]interface{}
if err := json.Unmarshal(body, &statusResp); err == nil {
if raft, ok := statusResp["raft"].(map[string]interface{}); ok {
state, _ := raft["state"].(string)
if state == "leader" || state == "follower" {
instance.logger.Debug("RQLite instance ready",
zap.String("state", state),
zap.Int("attempts", i+1),
)
return nil
}
} else {
// Backwards compatibility - if no raft status, consider ready
return nil
}
if resp.StatusCode == http.StatusOK {
return nil
}
}
resp.Body.Close()
time.Sleep(500 * time.Millisecond)
}
return fmt.Errorf("RQLite did not become ready within timeout")
return fmt.Errorf("timeout waiting for RQLite to be ready on port %d", httpPort)
}
// monitorInstance monitors an instance and updates its status
func (is *InstanceSpawner) monitorInstance(instance *RQLiteInstance) {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for range ticker.C {
is.mu.RLock()
key := instanceKey(instance.Namespace, instance.NodeID)
_, exists := is.instances[key]
is.mu.RUnlock()
if !exists {
// Instance was removed
return
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
healthy, _ := instance.IsHealthy(ctx)
cancel()
is.mu.Lock()
if healthy {
instance.Status = InstanceStatusRunning
instance.LastHealthCheck = time.Now()
} else {
instance.Status = InstanceStatusFailed
instance.logger.Warn("RQLite instance health check failed")
}
is.mu.Unlock()
// Check if process is still running
if instance.cmd != nil && instance.cmd.ProcessState != nil && instance.cmd.ProcessState.Exited() {
is.mu.Lock()
instance.Status = InstanceStatusStopped
is.mu.Unlock()
instance.logger.Warn("RQLite instance process exited unexpectedly")
return
}
}
}
// IsHealthy checks if the RQLite instance is healthy
func (ri *RQLiteInstance) IsHealthy(ctx context.Context) (bool, error) {
url := fmt.Sprintf("http://localhost:%d/status", ri.HTTPPort)
client := tlsutil.NewHTTPClient(5 * time.Second)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return false, err
// StopInstance stops a running RQLite instance
func (is *InstanceSpawner) StopInstance(ctx context.Context, instance *Instance) error {
if instance == nil || instance.Process == nil {
return nil
}
resp, err := client.Do(req)
if err != nil {
return false, err
}
defer resp.Body.Close()
is.logger.Info("Stopping RQLite instance",
zap.String("namespace", instance.Config.Namespace),
zap.Int("pid", instance.PID),
)
if resp.StatusCode != http.StatusOK {
return false, fmt.Errorf("status endpoint returned %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return false, err
}
var statusResp map[string]interface{}
if err := json.Unmarshal(body, &statusResp); err != nil {
return false, err
}
if raft, ok := statusResp["raft"].(map[string]interface{}); ok {
state, _ := raft["state"].(string)
return state == "leader" || state == "follower", nil
}
// Backwards compatibility
return true, nil
}
// GetLeaderAddress returns the leader's address for the cluster
func (ri *RQLiteInstance) GetLeaderAddress(ctx context.Context) (string, error) {
url := fmt.Sprintf("http://localhost:%d/status", ri.HTTPPort)
client := tlsutil.NewHTTPClient(5 * time.Second)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return "", err
}
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
var statusResp map[string]interface{}
if err := json.Unmarshal(body, &statusResp); err != nil {
return "", err
}
if raft, ok := statusResp["raft"].(map[string]interface{}); ok {
if leader, ok := raft["leader_addr"].(string); ok {
return leader, nil
// Send SIGTERM for graceful shutdown
if err := instance.Process.Signal(os.Interrupt); err != nil {
// If SIGTERM fails, try SIGKILL
if err := instance.Process.Kill(); err != nil {
return fmt.Errorf("failed to kill process: %w", err)
}
}
return "", fmt.Errorf("leader address not found in status response")
// Wait for process to exit
done := make(chan error, 1)
go func() {
_, err := instance.Process.Wait()
done <- err
}()
select {
case <-ctx.Done():
instance.Process.Kill()
return ctx.Err()
case err := <-done:
if err != nil {
is.logger.Warn("Process exited with error", zap.Error(err))
}
case <-time.After(10 * time.Second):
instance.Process.Kill()
}
is.logger.Info("RQLite instance stopped",
zap.String("namespace", instance.Config.Namespace),
)
return nil
}
// DSN returns the connection string for this RQLite instance
func (ri *RQLiteInstance) DSN() string {
return fmt.Sprintf("http://localhost:%d", ri.HTTPPort)
// StopInstanceByPID stops a RQLite instance by its PID
func (is *InstanceSpawner) StopInstanceByPID(pid int) error {
process, err := os.FindProcess(pid)
if err != nil {
return fmt.Errorf("process not found: %w", err)
}
// Send SIGTERM
if err := process.Signal(os.Interrupt); err != nil {
// Try SIGKILL
if err := process.Kill(); err != nil {
return fmt.Errorf("failed to kill process: %w", err)
}
}
return nil
}
// AdvertisedDSN returns the advertised connection string for cluster communication
func (ri *RQLiteInstance) AdvertisedDSN() string {
return fmt.Sprintf("http://%s", ri.HTTPAdvAddress)
// IsInstanceRunning checks if a RQLite instance is running
func (is *InstanceSpawner) IsInstanceRunning(httpPort int) bool {
url := fmt.Sprintf("http://localhost:%d/status", httpPort)
client := &http.Client{Timeout: 2 * time.Second}
resp, err := client.Get(url)
if err != nil {
return false
}
resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// GetDataDir returns the data directory path for a namespace RQLite instance
func (is *InstanceSpawner) GetDataDir(namespace, nodeID string) string {
return filepath.Join(is.baseDataDir, namespace, "rqlite", nodeID)
}
// CleanupDataDir removes the data directory for a namespace RQLite instance
func (is *InstanceSpawner) CleanupDataDir(namespace, nodeID string) error {
dataDir := is.GetDataDir(namespace, nodeID)
return os.RemoveAll(dataDir)
}

View File

@ -29,6 +29,12 @@ PORTS=(
9096 9106 9116 9126 9136
)
# Add namespace cluster ports (10000-10099)
# These are dynamically allocated for per-namespace RQLite/Olric/Gateway instances
for port in $(seq 10000 10099); do
PORTS+=($port)
done
killed_count=0
killed_pids=()
@ -57,6 +63,41 @@ SPECIFIC_PATTERNS=(
"anyone-client"
)
# Kill namespace cluster processes (spawned by ClusterManager)
# These are RQLite/Olric/Gateway instances running on ports 10000-10099
NAMESPACE_DATA_DIR="$HOME/.orama/data/namespaces"
if [[ -d "$NAMESPACE_DATA_DIR" ]]; then
# Find rqlited processes started in namespace directories
ns_pids=$(pgrep -f "rqlited.*$NAMESPACE_DATA_DIR" 2>/dev/null || true)
if [[ -n "$ns_pids" ]]; then
for pid in $ns_pids; do
echo " Killing namespace rqlited process (PID: $pid)"
kill -9 "$pid" 2>/dev/null || true
killed_pids+=("$pid")
done
fi
# Find olric-server processes started for namespaces (check env var or config path)
ns_olric_pids=$(pgrep -f "olric-server.*$NAMESPACE_DATA_DIR" 2>/dev/null || true)
if [[ -n "$ns_olric_pids" ]]; then
for pid in $ns_olric_pids; do
echo " Killing namespace olric-server process (PID: $pid)"
kill -9 "$pid" 2>/dev/null || true
killed_pids+=("$pid")
done
fi
# Find gateway processes started for namespaces
ns_gw_pids=$(pgrep -f "gateway.*--config.*$NAMESPACE_DATA_DIR" 2>/dev/null || true)
if [[ -n "$ns_gw_pids" ]]; then
for pid in $ns_gw_pids; do
echo " Killing namespace gateway process (PID: $pid)"
kill -9 "$pid" 2>/dev/null || true
killed_pids+=("$pid")
done
fi
fi
for pattern in "${SPECIFIC_PATTERNS[@]}"; do
# Use exact pattern matching to avoid false positives
all_pids=$(pgrep -f "$pattern" 2>/dev/null || true)