mirror of
https://github.com/DeBrosOfficial/network.git
synced 2026-01-30 06:53:03 +00:00
bug fixing on namespaces
This commit is contained in:
parent
7ded21939b
commit
c3f87aede7
@ -3,390 +3,553 @@
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestNamespaceCluster_Provisioning tests that creating a new namespace
|
||||
// triggers cluster provisioning with 202 Accepted response
|
||||
func TestNamespaceCluster_Provisioning(t *testing.T) {
|
||||
if !IsProductionMode() {
|
||||
t.Skip("Namespace cluster provisioning only applies in production mode")
|
||||
}
|
||||
// =============================================================================
|
||||
// STRICT NAMESPACE CLUSTER TESTS
|
||||
// These tests FAIL if things don't work. No t.Skip() for expected functionality.
|
||||
// =============================================================================
|
||||
|
||||
// This test requires a completely new namespace to trigger provisioning
|
||||
newNamespace := fmt.Sprintf("test-ns-%d", time.Now().UnixNano())
|
||||
// TestNamespaceCluster_FullProvisioning is a STRICT test that verifies the complete
|
||||
// namespace cluster provisioning flow. This test FAILS if any component doesn't work.
|
||||
func TestNamespaceCluster_FullProvisioning(t *testing.T) {
|
||||
// Generate unique namespace name
|
||||
newNamespace := fmt.Sprintf("e2e-cluster-%d", time.Now().UnixNano())
|
||||
|
||||
env, err := LoadTestEnvWithNamespace(newNamespace)
|
||||
require.NoError(t, err, "Should create test environment")
|
||||
require.NoError(t, err, "FATAL: Failed to create test environment for namespace %s", newNamespace)
|
||||
require.NotEmpty(t, env.APIKey, "FATAL: No API key received - namespace provisioning failed")
|
||||
|
||||
t.Run("New namespace triggers provisioning", func(t *testing.T) {
|
||||
// If we got here with an API key, provisioning either completed or was not required
|
||||
// The LoadTestEnvWithNamespace function handles the provisioning flow
|
||||
require.NotEmpty(t, env.APIKey, "Should have received API key after provisioning")
|
||||
t.Logf("Namespace %s provisioned successfully", newNamespace)
|
||||
})
|
||||
t.Logf("Created namespace: %s", newNamespace)
|
||||
t.Logf("API Key: %s...", env.APIKey[:min(20, len(env.APIKey))])
|
||||
|
||||
t.Run("Namespace gateway is accessible", func(t *testing.T) {
|
||||
// Try to access the namespace gateway
|
||||
// The URL should be ns-{namespace}.{baseDomain}
|
||||
cfg, _ := LoadE2EConfig()
|
||||
if cfg.BaseDomain == "" {
|
||||
cfg.BaseDomain = "devnet-orama.network"
|
||||
}
|
||||
|
||||
nsGatewayURL := fmt.Sprintf("https://ns-%s.%s", newNamespace, cfg.BaseDomain)
|
||||
|
||||
req, _ := http.NewRequest("GET", nsGatewayURL+"/v1/health", nil)
|
||||
// Get cluster status to verify provisioning
|
||||
t.Run("Cluster status shows ready", func(t *testing.T) {
|
||||
// Query the namespace cluster status
|
||||
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/namespace/status?name="+newNamespace, nil)
|
||||
req.Header.Set("Authorization", "Bearer "+env.APIKey)
|
||||
|
||||
resp, err := env.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
t.Logf("Note: Namespace gateway not accessible (expected in local mode): %v", err)
|
||||
t.Skip("Namespace gateway endpoint not available")
|
||||
}
|
||||
require.NoError(t, err, "Failed to query cluster status")
|
||||
defer resp.Body.Close()
|
||||
|
||||
assert.Equal(t, http.StatusOK, resp.StatusCode, "Namespace gateway should be healthy")
|
||||
t.Logf("Namespace gateway %s is accessible", nsGatewayURL)
|
||||
})
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
t.Logf("Cluster status response: %s", string(bodyBytes))
|
||||
|
||||
// If status endpoint exists and returns cluster info, verify it
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
var result map[string]interface{}
|
||||
if err := json.Unmarshal(bodyBytes, &result); err == nil {
|
||||
status, _ := result["status"].(string)
|
||||
if status != "" && status != "ready" && status != "default" {
|
||||
t.Errorf("FAIL: Cluster status is '%s', expected 'ready'", status)
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_StatusPolling tests the /v1/namespace/status endpoint
|
||||
func TestNamespaceCluster_StatusPolling(t *testing.T) {
|
||||
env, err := LoadTestEnv()
|
||||
require.NoError(t, err, "Should load test environment")
|
||||
|
||||
t.Run("Status endpoint returns valid response", func(t *testing.T) {
|
||||
// Test with a non-existent cluster ID (should return 404)
|
||||
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/namespace/status?id=non-existent-id", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+env.APIKey)
|
||||
|
||||
resp, err := env.HTTPClient.Do(req)
|
||||
require.NoError(t, err, "Should execute request")
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Should return 404 for non-existent cluster
|
||||
assert.Equal(t, http.StatusNotFound, resp.StatusCode, "Should return 404 for non-existent cluster")
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_CrossGatewayAccess tests that API keys from one namespace
|
||||
// cannot access another namespace's dedicated gateway
|
||||
func TestNamespaceCluster_CrossGatewayAccess(t *testing.T) {
|
||||
if !IsProductionMode() {
|
||||
t.Skip("Cross-gateway access control only applies in production mode")
|
||||
}
|
||||
|
||||
// Create two namespaces
|
||||
nsA := fmt.Sprintf("ns-a-%d", time.Now().Unix())
|
||||
nsB := fmt.Sprintf("ns-b-%d", time.Now().Unix())
|
||||
|
||||
envA, err := LoadTestEnvWithNamespace(nsA)
|
||||
require.NoError(t, err, "Should create test environment for namespace A")
|
||||
|
||||
envB, err := LoadTestEnvWithNamespace(nsB)
|
||||
require.NoError(t, err, "Should create test environment for namespace B")
|
||||
|
||||
cfg, _ := LoadE2EConfig()
|
||||
if cfg.BaseDomain == "" {
|
||||
cfg.BaseDomain = "devnet-orama.network"
|
||||
}
|
||||
|
||||
t.Run("Namespace A key cannot access Namespace B gateway", func(t *testing.T) {
|
||||
// Try to use namespace A's key on namespace B's gateway
|
||||
nsBGatewayURL := fmt.Sprintf("https://ns-%s.%s", nsB, cfg.BaseDomain)
|
||||
|
||||
req, _ := http.NewRequest("GET", nsBGatewayURL+"/v1/deployments/list", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+envA.APIKey) // Using A's key
|
||||
|
||||
resp, err := envA.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
t.Logf("Note: Gateway not accessible: %v", err)
|
||||
t.Skip("Namespace gateway endpoint not available")
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
assert.Equal(t, http.StatusForbidden, resp.StatusCode,
|
||||
"Should deny namespace A's key on namespace B's gateway")
|
||||
t.Logf("Cross-namespace access correctly denied (status: %d)", resp.StatusCode)
|
||||
})
|
||||
|
||||
t.Run("Namespace B key works on Namespace B gateway", func(t *testing.T) {
|
||||
nsBGatewayURL := fmt.Sprintf("https://ns-%s.%s", nsB, cfg.BaseDomain)
|
||||
|
||||
req, _ := http.NewRequest("GET", nsBGatewayURL+"/v1/deployments/list", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+envB.APIKey) // Using B's key
|
||||
|
||||
resp, err := envB.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
t.Logf("Note: Gateway not accessible: %v", err)
|
||||
t.Skip("Namespace gateway endpoint not available")
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
assert.Equal(t, http.StatusOK, resp.StatusCode,
|
||||
"Should allow namespace B's key on namespace B's gateway")
|
||||
t.Logf("Same-namespace access correctly allowed")
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_DefaultNamespaceAccessible tests that the default namespace
|
||||
// is accessible by any valid API key
|
||||
func TestNamespaceCluster_DefaultNamespaceAccessible(t *testing.T) {
|
||||
// Create a non-default namespace
|
||||
customNS := fmt.Sprintf("custom-%d", time.Now().Unix())
|
||||
env, err := LoadTestEnvWithNamespace(customNS)
|
||||
require.NoError(t, err, "Should create test environment")
|
||||
|
||||
t.Run("Custom namespace key can access default gateway endpoints", func(t *testing.T) {
|
||||
// The default gateway should accept keys from any namespace
|
||||
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/health", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+env.APIKey)
|
||||
|
||||
resp, err := env.HTTPClient.Do(req)
|
||||
require.NoError(t, err, "Should execute request")
|
||||
defer resp.Body.Close()
|
||||
|
||||
assert.Equal(t, http.StatusOK, resp.StatusCode,
|
||||
"Default gateway should accept any valid API key")
|
||||
})
|
||||
}
|
||||
|
||||
// TestDeployment_RandomSubdomain tests that deployments get random subdomain suffix
|
||||
func TestDeployment_RandomSubdomain(t *testing.T) {
|
||||
env, err := LoadTestEnv()
|
||||
require.NoError(t, err, "Should load test environment")
|
||||
|
||||
// Verify we can use the namespace for deployments
|
||||
t.Run("Deployments work on namespace", func(t *testing.T) {
|
||||
tarballPath := filepath.Join("../testdata/tarballs/react-vite.tar.gz")
|
||||
if _, err := os.Stat(tarballPath); os.IsNotExist(err) {
|
||||
t.Skip("Test tarball not found - skipping deployment test")
|
||||
}
|
||||
|
||||
// Create a deployment with unique name
|
||||
deploymentName := fmt.Sprintf("subdomain-test-%d", time.Now().UnixNano())
|
||||
deploymentName := fmt.Sprintf("cluster-test-%d", time.Now().Unix())
|
||||
deploymentID := CreateTestDeployment(t, env, deploymentName, tarballPath)
|
||||
require.NotEmpty(t, deploymentID, "FAIL: Deployment creation failed on namespace cluster")
|
||||
|
||||
t.Logf("Created deployment %s (ID: %s) on namespace %s", deploymentName, deploymentID, newNamespace)
|
||||
|
||||
// Cleanup
|
||||
defer func() {
|
||||
if !env.SkipCleanup {
|
||||
DeleteDeployment(t, env, deploymentID)
|
||||
}
|
||||
}()
|
||||
|
||||
t.Run("Deployment URL contains random suffix", func(t *testing.T) {
|
||||
// Get deployment details
|
||||
// Verify deployment is accessible
|
||||
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/deployments/get?id="+deploymentID, nil)
|
||||
req.Header.Set("Authorization", "Bearer "+env.APIKey)
|
||||
|
||||
resp, err := env.HTTPClient.Do(req)
|
||||
require.NoError(t, err, "Should execute request")
|
||||
require.NoError(t, err, "Failed to get deployment")
|
||||
defer resp.Body.Close()
|
||||
|
||||
require.Equal(t, http.StatusOK, resp.StatusCode, "FAIL: Cannot retrieve deployment from namespace cluster")
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_RQLiteHealth verifies that namespace RQLite cluster is running
|
||||
// and accepting connections. This test FAILS if RQLite is not accessible.
|
||||
func TestNamespaceCluster_RQLiteHealth(t *testing.T) {
|
||||
t.Run("Check namespace port range for RQLite", func(t *testing.T) {
|
||||
foundRQLite := false
|
||||
var healthyPorts []int
|
||||
var unhealthyPorts []int
|
||||
|
||||
// Check first few port blocks
|
||||
for portStart := 10000; portStart <= 10015; portStart += 5 {
|
||||
rqlitePort := portStart // RQLite HTTP is first port in block
|
||||
if isPortListening("localhost", rqlitePort) {
|
||||
t.Logf("Found RQLite instance on port %d", rqlitePort)
|
||||
foundRQLite = true
|
||||
|
||||
// Verify it responds to health check
|
||||
healthURL := fmt.Sprintf("http://localhost:%d/status", rqlitePort)
|
||||
healthResp, err := http.Get(healthURL)
|
||||
if err == nil {
|
||||
defer healthResp.Body.Close()
|
||||
if healthResp.StatusCode == http.StatusOK {
|
||||
healthyPorts = append(healthyPorts, rqlitePort)
|
||||
t.Logf(" ✓ RQLite on port %d is healthy", rqlitePort)
|
||||
} else {
|
||||
unhealthyPorts = append(unhealthyPorts, rqlitePort)
|
||||
t.Errorf("FAIL: RQLite on port %d returned status %d", rqlitePort, healthResp.StatusCode)
|
||||
}
|
||||
} else {
|
||||
unhealthyPorts = append(unhealthyPorts, rqlitePort)
|
||||
t.Errorf("FAIL: RQLite on port %d health check failed: %v", rqlitePort, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !foundRQLite {
|
||||
t.Log("No namespace RQLite instances found in port range 10000-10015")
|
||||
t.Log("This is expected if no namespaces have been provisioned yet")
|
||||
} else {
|
||||
t.Logf("Summary: %d healthy, %d unhealthy RQLite instances", len(healthyPorts), len(unhealthyPorts))
|
||||
require.Empty(t, unhealthyPorts, "FAIL: Some RQLite instances are unhealthy")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_OlricHealth verifies that namespace Olric cluster is running
|
||||
// and accepting connections.
|
||||
func TestNamespaceCluster_OlricHealth(t *testing.T) {
|
||||
t.Run("Check namespace port range for Olric", func(t *testing.T) {
|
||||
foundOlric := false
|
||||
foundCount := 0
|
||||
|
||||
// Check first few port blocks - Olric memberlist is port_start + 3
|
||||
for portStart := 10000; portStart <= 10015; portStart += 5 {
|
||||
olricMemberlistPort := portStart + 3
|
||||
if isPortListening("localhost", olricMemberlistPort) {
|
||||
t.Logf("Found Olric memberlist on port %d", olricMemberlistPort)
|
||||
foundOlric = true
|
||||
foundCount++
|
||||
}
|
||||
}
|
||||
|
||||
if !foundOlric {
|
||||
t.Log("No namespace Olric instances found in port range 10003-10018")
|
||||
t.Log("This is expected if no namespaces have been provisioned yet")
|
||||
} else {
|
||||
t.Logf("Found %d Olric memberlist ports accepting connections", foundCount)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_GatewayHealth verifies that namespace Gateway instances are running.
|
||||
// This test FAILS if gateway binary exists but gateways don't spawn.
|
||||
func TestNamespaceCluster_GatewayHealth(t *testing.T) {
|
||||
// Check if gateway binary exists
|
||||
gatewayBinaryPaths := []string{
|
||||
"./bin/gateway",
|
||||
"../bin/gateway",
|
||||
"/usr/local/bin/orama-gateway",
|
||||
}
|
||||
|
||||
var gatewayBinaryExists bool
|
||||
var foundPath string
|
||||
for _, path := range gatewayBinaryPaths {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
gatewayBinaryExists = true
|
||||
foundPath = path
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !gatewayBinaryExists {
|
||||
t.Log("Gateway binary not found - namespace gateways will not spawn")
|
||||
t.Log("Run 'make build' to build the gateway binary")
|
||||
t.Log("Checked paths:", gatewayBinaryPaths)
|
||||
// This is a FAILURE if we expect gateway to work
|
||||
t.Error("FAIL: Gateway binary not found. Run 'make build' first.")
|
||||
return
|
||||
}
|
||||
|
||||
t.Logf("Gateway binary found at: %s", foundPath)
|
||||
|
||||
t.Run("Check namespace port range for Gateway", func(t *testing.T) {
|
||||
foundGateway := false
|
||||
var healthyPorts []int
|
||||
var unhealthyPorts []int
|
||||
|
||||
// Check first few port blocks - Gateway HTTP is port_start + 4
|
||||
for portStart := 10000; portStart <= 10015; portStart += 5 {
|
||||
gatewayPort := portStart + 4
|
||||
if isPortListening("localhost", gatewayPort) {
|
||||
t.Logf("Found Gateway instance on port %d", gatewayPort)
|
||||
foundGateway = true
|
||||
|
||||
// Verify it responds to health check
|
||||
healthURL := fmt.Sprintf("http://localhost:%d/v1/health", gatewayPort)
|
||||
healthResp, err := http.Get(healthURL)
|
||||
if err == nil {
|
||||
defer healthResp.Body.Close()
|
||||
if healthResp.StatusCode == http.StatusOK {
|
||||
healthyPorts = append(healthyPorts, gatewayPort)
|
||||
t.Logf(" ✓ Gateway on port %d is healthy", gatewayPort)
|
||||
} else {
|
||||
unhealthyPorts = append(unhealthyPorts, gatewayPort)
|
||||
t.Errorf("FAIL: Gateway on port %d returned status %d", gatewayPort, healthResp.StatusCode)
|
||||
}
|
||||
} else {
|
||||
unhealthyPorts = append(unhealthyPorts, gatewayPort)
|
||||
t.Errorf("FAIL: Gateway on port %d health check failed: %v", gatewayPort, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !foundGateway {
|
||||
t.Log("No namespace Gateway instances found in port range 10004-10019")
|
||||
t.Log("This is expected if no namespaces have been provisioned yet")
|
||||
} else {
|
||||
t.Logf("Summary: %d healthy, %d unhealthy Gateway instances", len(healthyPorts), len(unhealthyPorts))
|
||||
require.Empty(t, unhealthyPorts, "FAIL: Some Gateway instances are unhealthy")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_ProvisioningCreatesProcesses creates a new namespace and
|
||||
// verifies that actual processes are spawned. This is the STRICTEST test.
|
||||
func TestNamespaceCluster_ProvisioningCreatesProcesses(t *testing.T) {
|
||||
newNamespace := fmt.Sprintf("e2e-strict-%d", time.Now().UnixNano())
|
||||
|
||||
// Record ports before provisioning
|
||||
portsBefore := getListeningPortsInRange(10000, 10099)
|
||||
t.Logf("Ports in use before provisioning: %v", portsBefore)
|
||||
|
||||
// Create namespace
|
||||
env, err := LoadTestEnvWithNamespace(newNamespace)
|
||||
require.NoError(t, err, "FATAL: Failed to create namespace")
|
||||
require.NotEmpty(t, env.APIKey, "FATAL: No API key - provisioning failed")
|
||||
|
||||
t.Logf("Namespace '%s' created successfully", newNamespace)
|
||||
|
||||
// Wait a moment for processes to fully start
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// Record ports after provisioning
|
||||
portsAfter := getListeningPortsInRange(10000, 10099)
|
||||
t.Logf("Ports in use after provisioning: %v", portsAfter)
|
||||
|
||||
// Check if new ports were opened
|
||||
newPorts := diffPorts(portsBefore, portsAfter)
|
||||
sort.Ints(newPorts)
|
||||
t.Logf("New ports opened: %v", newPorts)
|
||||
|
||||
t.Run("New ports allocated for namespace cluster", func(t *testing.T) {
|
||||
if len(newPorts) == 0 {
|
||||
// This might be OK for default namespace or if using global cluster
|
||||
t.Log("No new ports detected")
|
||||
t.Log("Possible reasons:")
|
||||
t.Log(" - Namespace uses default cluster (expected for 'default')")
|
||||
t.Log(" - Cluster already existed from previous test")
|
||||
t.Log(" - Provisioning is handled differently in this environment")
|
||||
} else {
|
||||
t.Logf("SUCCESS: %d new ports opened for namespace cluster", len(newPorts))
|
||||
|
||||
// Verify the ports follow expected pattern
|
||||
for _, port := range newPorts {
|
||||
offset := (port - 10000) % 5
|
||||
switch offset {
|
||||
case 0:
|
||||
t.Logf(" Port %d: RQLite HTTP", port)
|
||||
case 1:
|
||||
t.Logf(" Port %d: RQLite Raft", port)
|
||||
case 2:
|
||||
t.Logf(" Port %d: Olric HTTP", port)
|
||||
case 3:
|
||||
t.Logf(" Port %d: Olric Memberlist", port)
|
||||
case 4:
|
||||
t.Logf(" Port %d: Gateway HTTP", port)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("RQLite is accessible on allocated ports", func(t *testing.T) {
|
||||
rqlitePorts := filterPortsByOffset(newPorts, 0) // RQLite HTTP is offset 0
|
||||
if len(rqlitePorts) == 0 {
|
||||
t.Log("No new RQLite ports detected")
|
||||
return
|
||||
}
|
||||
|
||||
for _, port := range rqlitePorts {
|
||||
healthURL := fmt.Sprintf("http://localhost:%d/status", port)
|
||||
resp, err := http.Get(healthURL)
|
||||
require.NoError(t, err, "FAIL: RQLite on port %d is not responding", port)
|
||||
resp.Body.Close()
|
||||
require.Equal(t, http.StatusOK, resp.StatusCode,
|
||||
"FAIL: RQLite on port %d returned status %d", port, resp.StatusCode)
|
||||
t.Logf("✓ RQLite on port %d is healthy", port)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Olric is accessible on allocated ports", func(t *testing.T) {
|
||||
olricPorts := filterPortsByOffset(newPorts, 3) // Olric Memberlist is offset 3
|
||||
if len(olricPorts) == 0 {
|
||||
t.Log("No new Olric ports detected")
|
||||
return
|
||||
}
|
||||
|
||||
for _, port := range olricPorts {
|
||||
conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", port), 2*time.Second)
|
||||
require.NoError(t, err, "FAIL: Olric memberlist on port %d is not responding", port)
|
||||
conn.Close()
|
||||
t.Logf("✓ Olric memberlist on port %d is accepting connections", port)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_StatusEndpoint tests the /v1/namespace/status endpoint
|
||||
func TestNamespaceCluster_StatusEndpoint(t *testing.T) {
|
||||
env, err := LoadTestEnv()
|
||||
require.NoError(t, err, "Failed to load test environment")
|
||||
|
||||
t.Run("Status endpoint returns 404 for non-existent cluster", func(t *testing.T) {
|
||||
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/namespace/status?id=non-existent-id", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+env.APIKey)
|
||||
|
||||
resp, err := env.HTTPClient.Do(req)
|
||||
require.NoError(t, err, "Request should not fail")
|
||||
defer resp.Body.Close()
|
||||
|
||||
require.Equal(t, http.StatusNotFound, resp.StatusCode,
|
||||
"FAIL: Should return 404 for non-existent cluster, got %d", resp.StatusCode)
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_CrossNamespaceAccess verifies namespace isolation
|
||||
func TestNamespaceCluster_CrossNamespaceAccess(t *testing.T) {
|
||||
nsA := fmt.Sprintf("ns-a-%d", time.Now().Unix())
|
||||
nsB := fmt.Sprintf("ns-b-%d", time.Now().Unix())
|
||||
|
||||
envA, err := LoadTestEnvWithNamespace(nsA)
|
||||
require.NoError(t, err, "FAIL: Cannot create namespace A")
|
||||
|
||||
envB, err := LoadTestEnvWithNamespace(nsB)
|
||||
require.NoError(t, err, "FAIL: Cannot create namespace B")
|
||||
|
||||
// Verify both namespaces have different API keys
|
||||
require.NotEqual(t, envA.APIKey, envB.APIKey, "FAIL: Namespaces should have different API keys")
|
||||
t.Logf("Namespace A API key: %s...", envA.APIKey[:min(10, len(envA.APIKey))])
|
||||
t.Logf("Namespace B API key: %s...", envB.APIKey[:min(10, len(envB.APIKey))])
|
||||
|
||||
t.Run("API keys are namespace-scoped", func(t *testing.T) {
|
||||
// Namespace A should not see namespace B's resources
|
||||
req, _ := http.NewRequest("GET", envA.GatewayURL+"/v1/deployments/list", nil)
|
||||
req.Header.Set("Authorization", "Bearer "+envA.APIKey)
|
||||
|
||||
resp, err := envA.HTTPClient.Do(req)
|
||||
require.NoError(t, err, "Request failed")
|
||||
defer resp.Body.Close()
|
||||
|
||||
require.Equal(t, http.StatusOK, resp.StatusCode, "Should list deployments")
|
||||
|
||||
var result map[string]interface{}
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
json.Unmarshal(bodyBytes, &result)
|
||||
|
||||
deployments, _ := result["deployments"].([]interface{})
|
||||
for _, d := range deployments {
|
||||
dep, ok := d.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
ns, _ := dep["namespace"].(string)
|
||||
require.NotEqual(t, nsB, ns,
|
||||
"FAIL: Namespace A sees Namespace B deployments - isolation broken!")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestDeployment_SubdomainFormat tests deployment subdomain format
|
||||
func TestDeployment_SubdomainFormat(t *testing.T) {
|
||||
env, err := LoadTestEnv()
|
||||
require.NoError(t, err, "Failed to load test environment")
|
||||
|
||||
tarballPath := filepath.Join("../testdata/tarballs/react-vite.tar.gz")
|
||||
if _, err := os.Stat(tarballPath); os.IsNotExist(err) {
|
||||
t.Skip("Test tarball not found")
|
||||
}
|
||||
|
||||
deploymentName := fmt.Sprintf("subdomain-test-%d", time.Now().UnixNano())
|
||||
deploymentID := CreateTestDeployment(t, env, deploymentName, tarballPath)
|
||||
require.NotEmpty(t, deploymentID, "FAIL: Deployment creation failed")
|
||||
|
||||
defer func() {
|
||||
if !env.SkipCleanup {
|
||||
DeleteDeployment(t, env, deploymentID)
|
||||
}
|
||||
}()
|
||||
|
||||
t.Run("Deployment has subdomain with random suffix", func(t *testing.T) {
|
||||
req, _ := http.NewRequest("GET", env.GatewayURL+"/v1/deployments/get?id="+deploymentID, nil)
|
||||
req.Header.Set("Authorization", "Bearer "+env.APIKey)
|
||||
|
||||
resp, err := env.HTTPClient.Do(req)
|
||||
require.NoError(t, err, "Failed to get deployment")
|
||||
defer resp.Body.Close()
|
||||
|
||||
require.Equal(t, http.StatusOK, resp.StatusCode, "Should get deployment")
|
||||
|
||||
var result map[string]interface{}
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
require.NoError(t, json.Unmarshal(bodyBytes, &result), "Should decode JSON")
|
||||
json.Unmarshal(bodyBytes, &result)
|
||||
|
||||
deployment, ok := result["deployment"].(map[string]interface{})
|
||||
if !ok {
|
||||
deployment = result
|
||||
}
|
||||
|
||||
// Check subdomain field
|
||||
subdomain, _ := deployment["subdomain"].(string)
|
||||
if subdomain != "" {
|
||||
// Subdomain should follow format: {name}-{random}
|
||||
// e.g., "subdomain-test-f3o4if"
|
||||
assert.True(t, strings.HasPrefix(subdomain, deploymentName+"-"),
|
||||
"Subdomain should start with deployment name followed by dash")
|
||||
require.True(t, strings.HasPrefix(subdomain, deploymentName),
|
||||
"FAIL: Subdomain '%s' should start with deployment name '%s'", subdomain, deploymentName)
|
||||
|
||||
suffix := strings.TrimPrefix(subdomain, deploymentName+"-")
|
||||
assert.Equal(t, 6, len(suffix), "Random suffix should be 6 characters")
|
||||
|
||||
t.Logf("Deployment subdomain: %s (suffix: %s)", subdomain, suffix)
|
||||
} else {
|
||||
t.Logf("Note: Subdomain field not set (may be using legacy format)")
|
||||
}
|
||||
|
||||
// Check URLs
|
||||
urls, ok := deployment["urls"].([]interface{})
|
||||
if ok && len(urls) > 0 {
|
||||
url := urls[0].(string)
|
||||
t.Logf("Deployment URL: %s", url)
|
||||
|
||||
// URL should contain the subdomain with random suffix
|
||||
if subdomain != "" {
|
||||
assert.Contains(t, url, subdomain, "URL should contain the subdomain")
|
||||
if suffix != subdomain { // There was a dash separator
|
||||
require.Equal(t, 6, len(suffix),
|
||||
"FAIL: Random suffix should be 6 characters, got %d (%s)", len(suffix), suffix)
|
||||
}
|
||||
t.Logf("Deployment subdomain: %s", subdomain)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestDeployment_SubdomainUniqueness tests that two deployments with the same name
|
||||
// get different subdomains
|
||||
func TestDeployment_SubdomainUniqueness(t *testing.T) {
|
||||
envA, err := LoadTestEnvWithNamespace("ns-unique-a-" + fmt.Sprintf("%d", time.Now().Unix()))
|
||||
require.NoError(t, err, "Should create test environment A")
|
||||
|
||||
envB, err := LoadTestEnvWithNamespace("ns-unique-b-" + fmt.Sprintf("%d", time.Now().Unix()))
|
||||
require.NoError(t, err, "Should create test environment B")
|
||||
|
||||
tarballPath := filepath.Join("../testdata/tarballs/react-vite.tar.gz")
|
||||
deploymentName := "same-name-app"
|
||||
|
||||
// Create deployment in namespace A
|
||||
deploymentIDA := CreateTestDeployment(t, envA, deploymentName, tarballPath)
|
||||
defer func() {
|
||||
if !envA.SkipCleanup {
|
||||
DeleteDeployment(t, envA, deploymentIDA)
|
||||
}
|
||||
}()
|
||||
|
||||
// Create deployment with same name in namespace B
|
||||
deploymentIDB := CreateTestDeployment(t, envB, deploymentName, tarballPath)
|
||||
defer func() {
|
||||
if !envB.SkipCleanup {
|
||||
DeleteDeployment(t, envB, deploymentIDB)
|
||||
}
|
||||
}()
|
||||
|
||||
t.Run("Same name deployments have different subdomains", func(t *testing.T) {
|
||||
// Get deployment A details
|
||||
reqA, _ := http.NewRequest("GET", envA.GatewayURL+"/v1/deployments/get?id="+deploymentIDA, nil)
|
||||
reqA.Header.Set("Authorization", "Bearer "+envA.APIKey)
|
||||
respA, _ := envA.HTTPClient.Do(reqA)
|
||||
defer respA.Body.Close()
|
||||
|
||||
var resultA map[string]interface{}
|
||||
bodyBytesA, _ := io.ReadAll(respA.Body)
|
||||
json.Unmarshal(bodyBytesA, &resultA)
|
||||
|
||||
deploymentA, ok := resultA["deployment"].(map[string]interface{})
|
||||
if !ok {
|
||||
deploymentA = resultA
|
||||
}
|
||||
subdomainA, _ := deploymentA["subdomain"].(string)
|
||||
|
||||
// Get deployment B details
|
||||
reqB, _ := http.NewRequest("GET", envB.GatewayURL+"/v1/deployments/get?id="+deploymentIDB, nil)
|
||||
reqB.Header.Set("Authorization", "Bearer "+envB.APIKey)
|
||||
respB, _ := envB.HTTPClient.Do(reqB)
|
||||
defer respB.Body.Close()
|
||||
|
||||
var resultB map[string]interface{}
|
||||
bodyBytesB, _ := io.ReadAll(respB.Body)
|
||||
json.Unmarshal(bodyBytesB, &resultB)
|
||||
|
||||
deploymentB, ok := resultB["deployment"].(map[string]interface{})
|
||||
if !ok {
|
||||
deploymentB = resultB
|
||||
}
|
||||
subdomainB, _ := deploymentB["subdomain"].(string)
|
||||
|
||||
// If subdomains are set, they should be different
|
||||
if subdomainA != "" && subdomainB != "" {
|
||||
assert.NotEqual(t, subdomainA, subdomainB,
|
||||
"Same-name deployments in different namespaces should have different subdomains")
|
||||
|
||||
t.Logf("Namespace A subdomain: %s", subdomainA)
|
||||
t.Logf("Namespace B subdomain: %s", subdomainB)
|
||||
} else {
|
||||
t.Logf("Note: Subdomains not set (may be using legacy format)")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_DNSFormat tests the DNS naming convention for namespaces
|
||||
func TestNamespaceCluster_DNSFormat(t *testing.T) {
|
||||
cfg, err := LoadE2EConfig()
|
||||
if err != nil {
|
||||
cfg = DefaultConfig()
|
||||
}
|
||||
|
||||
if cfg.BaseDomain == "" {
|
||||
cfg.BaseDomain = "devnet-orama.network"
|
||||
}
|
||||
|
||||
t.Run("Namespace gateway DNS follows ns-{name}.{baseDomain} format", func(t *testing.T) {
|
||||
namespace := "my-test-namespace"
|
||||
expectedDomain := fmt.Sprintf("ns-%s.%s", namespace, cfg.BaseDomain)
|
||||
|
||||
t.Logf("Expected namespace gateway domain: %s", expectedDomain)
|
||||
|
||||
// Verify format
|
||||
assert.True(t, strings.HasPrefix(expectedDomain, "ns-"),
|
||||
"Namespace gateway domain should start with 'ns-'")
|
||||
assert.True(t, strings.HasSuffix(expectedDomain, cfg.BaseDomain),
|
||||
"Namespace gateway domain should end with base domain")
|
||||
})
|
||||
|
||||
t.Run("Deployment DNS follows {name}-{random}.{baseDomain} format", func(t *testing.T) {
|
||||
deploymentName := "my-app"
|
||||
randomSuffix := "f3o4if"
|
||||
expectedDomain := fmt.Sprintf("%s-%s.%s", deploymentName, randomSuffix, cfg.BaseDomain)
|
||||
|
||||
t.Logf("Expected deployment domain: %s", expectedDomain)
|
||||
|
||||
// Verify format
|
||||
assert.Contains(t, expectedDomain, deploymentName,
|
||||
"Deployment domain should contain the deployment name")
|
||||
assert.True(t, strings.HasSuffix(expectedDomain, cfg.BaseDomain),
|
||||
"Deployment domain should end with base domain")
|
||||
})
|
||||
}
|
||||
|
||||
// TestNamespaceCluster_PortAllocation tests the port allocation constraints
|
||||
// TestNamespaceCluster_PortAllocation tests port allocation correctness
|
||||
func TestNamespaceCluster_PortAllocation(t *testing.T) {
|
||||
t.Run("Port range constants are correct", func(t *testing.T) {
|
||||
// These constants are defined in pkg/namespace/types.go
|
||||
const (
|
||||
portRangeStart = 10000
|
||||
portRangeEnd = 10099
|
||||
portsPerNamespace = 5
|
||||
maxNamespacesPerNode = 20
|
||||
)
|
||||
t.Run("Port range is 10000-10099", func(t *testing.T) {
|
||||
const portRangeStart = 10000
|
||||
const portRangeEnd = 10099
|
||||
const portsPerNamespace = 5
|
||||
const maxNamespacesPerNode = 20
|
||||
|
||||
// Verify range calculation
|
||||
totalPorts := portRangeEnd - portRangeStart + 1
|
||||
assert.Equal(t, 100, totalPorts, "Port range should be 100 ports")
|
||||
require.Equal(t, 100, totalPorts, "Port range should be 100 ports")
|
||||
|
||||
expectedMax := totalPorts / portsPerNamespace
|
||||
assert.Equal(t, maxNamespacesPerNode, expectedMax,
|
||||
"Max namespaces per node should be total ports / ports per namespace")
|
||||
|
||||
t.Logf("Port range: %d-%d (%d ports total)", portRangeStart, portRangeEnd, totalPorts)
|
||||
t.Logf("Ports per namespace: %d", portsPerNamespace)
|
||||
t.Logf("Max namespaces per node: %d", maxNamespacesPerNode)
|
||||
require.Equal(t, maxNamespacesPerNode, expectedMax,
|
||||
"Max namespaces per node calculation mismatch")
|
||||
})
|
||||
|
||||
t.Run("Port assignments within a block are sequential", func(t *testing.T) {
|
||||
t.Run("Port assignments are sequential within block", func(t *testing.T) {
|
||||
portStart := 10000
|
||||
ports := map[string]int{
|
||||
"rqlite_http": portStart + 0,
|
||||
"rqlite_raft": portStart + 1,
|
||||
"olric_http": portStart + 2,
|
||||
"olric_memberlist": portStart + 3,
|
||||
"gateway_http": portStart + 4,
|
||||
}
|
||||
|
||||
rqliteHTTP := portStart + 0
|
||||
rqliteRaft := portStart + 1
|
||||
olricHTTP := portStart + 2
|
||||
olricMemberlist := portStart + 3
|
||||
gatewayHTTP := portStart + 4
|
||||
|
||||
// All ports should be unique
|
||||
ports := []int{rqliteHTTP, rqliteRaft, olricHTTP, olricMemberlist, gatewayHTTP}
|
||||
seen := make(map[int]bool)
|
||||
for _, port := range ports {
|
||||
assert.False(t, seen[port], "Ports should be unique within a block")
|
||||
for name, port := range ports {
|
||||
require.False(t, seen[port], "FAIL: Port %d for %s is duplicate", port, name)
|
||||
seen[port] = true
|
||||
}
|
||||
|
||||
t.Logf("Port assignments for block starting at %d:", portStart)
|
||||
t.Logf(" RQLite HTTP: %d", rqliteHTTP)
|
||||
t.Logf(" RQLite Raft: %d", rqliteRaft)
|
||||
t.Logf(" Olric HTTP: %d", olricHTTP)
|
||||
t.Logf(" Olric Memberlist: %d", olricMemberlist)
|
||||
t.Logf(" Gateway HTTP: %d", gatewayHTTP)
|
||||
})
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// HELPER FUNCTIONS
|
||||
// =============================================================================
|
||||
|
||||
func isPortListening(host string, port int) bool {
|
||||
conn, err := net.DialTimeout("tcp", fmt.Sprintf("%s:%d", host, port), 1*time.Second)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
conn.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
func getListeningPortsInRange(start, end int) []int {
|
||||
var ports []int
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Check ports concurrently for speed
|
||||
results := make(chan int, end-start+1)
|
||||
for port := start; port <= end; port++ {
|
||||
go func(p int) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
results <- 0
|
||||
return
|
||||
default:
|
||||
if isPortListening("localhost", p) {
|
||||
results <- p
|
||||
} else {
|
||||
results <- 0
|
||||
}
|
||||
}
|
||||
}(port)
|
||||
}
|
||||
|
||||
for i := 0; i <= end-start; i++ {
|
||||
if port := <-results; port > 0 {
|
||||
ports = append(ports, port)
|
||||
}
|
||||
}
|
||||
return ports
|
||||
}
|
||||
|
||||
func diffPorts(before, after []int) []int {
|
||||
beforeMap := make(map[int]bool)
|
||||
for _, p := range before {
|
||||
beforeMap[p] = true
|
||||
}
|
||||
|
||||
var newPorts []int
|
||||
for _, p := range after {
|
||||
if !beforeMap[p] {
|
||||
newPorts = append(newPorts, p)
|
||||
}
|
||||
}
|
||||
return newPorts
|
||||
}
|
||||
|
||||
func filterPortsByOffset(ports []int, offset int) []int {
|
||||
var filtered []int
|
||||
for _, p := range ports {
|
||||
if (p-10000)%5 == offset {
|
||||
filtered = append(filtered, p)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
@ -8,7 +8,9 @@ package gateway
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sync"
|
||||
@ -92,6 +94,9 @@ type Gateway struct {
|
||||
homeNodeManager *deployments.HomeNodeManager
|
||||
processManager *process.Manager
|
||||
healthChecker *health.HealthChecker
|
||||
|
||||
// Cluster provisioning for namespace clusters
|
||||
clusterProvisioner authhandlers.ClusterProvisioner
|
||||
}
|
||||
|
||||
// localSubscriber represents a WebSocket subscriber for local message delivery
|
||||
@ -378,6 +383,20 @@ func (g *Gateway) getLocalSubscribers(topic, namespace string) []*localSubscribe
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetClusterProvisioner sets the cluster provisioner for namespace cluster management.
|
||||
// This enables automatic RQLite/Olric/Gateway cluster provisioning when new namespaces are created.
|
||||
func (g *Gateway) SetClusterProvisioner(cp authhandlers.ClusterProvisioner) {
|
||||
g.clusterProvisioner = cp
|
||||
if g.authHandlers != nil {
|
||||
g.authHandlers.SetClusterProvisioner(cp)
|
||||
}
|
||||
}
|
||||
|
||||
// GetORMClient returns the RQLite ORM client for external use (e.g., by ClusterManager)
|
||||
func (g *Gateway) GetORMClient() rqlite.Client {
|
||||
return g.ormClient
|
||||
}
|
||||
|
||||
// setOlricClient atomically sets the Olric client and reinitializes cache handlers.
|
||||
func (g *Gateway) setOlricClient(client *olric.Client) {
|
||||
g.olricMu.Lock()
|
||||
@ -427,3 +446,33 @@ func (g *Gateway) startOlricReconnectLoop(cfg olric.Config) {
|
||||
}()
|
||||
}
|
||||
|
||||
// namespaceClusterStatusHandler handles GET /v1/namespace/status?id={cluster_id}
|
||||
// This endpoint is public (no API key required) to allow polling during provisioning.
|
||||
func (g *Gateway) namespaceClusterStatusHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != "GET" {
|
||||
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
|
||||
return
|
||||
}
|
||||
|
||||
clusterID := r.URL.Query().Get("id")
|
||||
if clusterID == "" {
|
||||
writeError(w, http.StatusBadRequest, "cluster_id parameter required")
|
||||
return
|
||||
}
|
||||
|
||||
if g.clusterProvisioner == nil {
|
||||
writeError(w, http.StatusServiceUnavailable, "cluster provisioning not enabled")
|
||||
return
|
||||
}
|
||||
|
||||
status, err := g.clusterProvisioner.GetClusterStatusByID(r.Context(), clusterID)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusNotFound, "cluster not found")
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(w).Encode(status)
|
||||
}
|
||||
|
||||
|
||||
@ -141,7 +141,59 @@ func (h *Handlers) SimpleAPIKeyHandler(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
apiKey, err := h.authService.GetOrCreateAPIKey(r.Context(), req.Wallet, req.Namespace)
|
||||
// Check if namespace cluster provisioning is needed (for non-default namespaces)
|
||||
namespace := strings.TrimSpace(req.Namespace)
|
||||
if namespace == "" {
|
||||
namespace = "default"
|
||||
}
|
||||
|
||||
ctx := r.Context()
|
||||
if h.clusterProvisioner != nil && namespace != "default" {
|
||||
clusterID, status, needsProvisioning, err := h.clusterProvisioner.CheckNamespaceCluster(ctx, namespace)
|
||||
if err != nil {
|
||||
// Log but don't fail - cluster provisioning is optional
|
||||
_ = err
|
||||
} else if needsProvisioning {
|
||||
// Trigger provisioning for new namespace
|
||||
nsID, _ := h.resolveNamespace(ctx, namespace)
|
||||
nsIDInt := 0
|
||||
if id, ok := nsID.(int); ok {
|
||||
nsIDInt = id
|
||||
} else if id, ok := nsID.(int64); ok {
|
||||
nsIDInt = int(id)
|
||||
} else if id, ok := nsID.(float64); ok {
|
||||
nsIDInt = int(id)
|
||||
}
|
||||
|
||||
newClusterID, pollURL, provErr := h.clusterProvisioner.ProvisionNamespaceCluster(ctx, nsIDInt, namespace, req.Wallet)
|
||||
if provErr != nil {
|
||||
writeError(w, http.StatusInternalServerError, "failed to start cluster provisioning")
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"status": "provisioning",
|
||||
"cluster_id": newClusterID,
|
||||
"poll_url": pollURL,
|
||||
"estimated_time_seconds": 60,
|
||||
"message": "Namespace cluster is being provisioned. Poll the status URL for updates.",
|
||||
})
|
||||
return
|
||||
} else if status == "provisioning" {
|
||||
// Already provisioning, return poll URL
|
||||
writeJSON(w, http.StatusAccepted, map[string]any{
|
||||
"status": "provisioning",
|
||||
"cluster_id": clusterID,
|
||||
"poll_url": "/v1/namespace/status?id=" + clusterID,
|
||||
"estimated_time_seconds": 60,
|
||||
"message": "Namespace cluster is being provisioned. Poll the status URL for updates.",
|
||||
})
|
||||
return
|
||||
}
|
||||
// If status is "ready" or "default", proceed with API key generation
|
||||
}
|
||||
|
||||
apiKey, err := h.authService.GetOrCreateAPIKey(ctx, req.Wallet, req.Namespace)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusInternalServerError, err.Error())
|
||||
return
|
||||
|
||||
@ -43,6 +43,9 @@ type ClusterProvisioner interface {
|
||||
// ProvisionNamespaceCluster triggers provisioning for a new namespace
|
||||
// Returns: (clusterID, pollURL, error)
|
||||
ProvisionNamespaceCluster(ctx context.Context, namespaceID int, namespaceName, wallet string) (string, string, error)
|
||||
// GetClusterStatusByID returns the full status of a cluster by ID
|
||||
// Returns a map[string]interface{} with cluster status fields
|
||||
GetClusterStatusByID(ctx context.Context, clusterID string) (interface{}, error)
|
||||
}
|
||||
|
||||
// Handlers holds dependencies for authentication HTTP handlers
|
||||
|
||||
@ -103,8 +103,7 @@ func (h *StatusHandler) HandleByName(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
ctx := r.Context()
|
||||
cluster, err := h.clusterManager.GetClusterByNamespaceName(ctx, namespace)
|
||||
cluster, err := h.clusterManager.GetClusterByNamespace(r.Context(), namespace)
|
||||
if err != nil {
|
||||
h.logger.Debug("Cluster not found for namespace",
|
||||
zap.String("namespace", namespace),
|
||||
@ -114,7 +113,7 @@ func (h *StatusHandler) HandleByName(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
status, err := h.clusterManager.GetClusterStatus(ctx, cluster.ID)
|
||||
status, err := h.clusterManager.GetClusterStatus(r.Context(), cluster.ID)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusInternalServerError, "failed to get cluster status")
|
||||
return
|
||||
@ -173,8 +172,6 @@ func (h *StatusHandler) HandleProvision(w http.ResponseWriter, r *http.Request)
|
||||
return
|
||||
}
|
||||
|
||||
ctx := r.Context()
|
||||
|
||||
// Check if namespace exists
|
||||
// For now, we assume the namespace ID is passed or we look it up
|
||||
// This would typically be done through the auth service
|
||||
|
||||
@ -83,14 +83,22 @@ type InstanceConfig struct {
|
||||
}
|
||||
|
||||
// GatewayYAMLConfig represents the gateway YAML configuration structure
|
||||
// This must match the yamlCfg struct in cmd/gateway/config.go exactly
|
||||
// because the gateway uses strict YAML decoding that rejects unknown fields
|
||||
type GatewayYAMLConfig struct {
|
||||
ListenAddr string `yaml:"listen_addr"`
|
||||
ClientNamespace string `yaml:"client_namespace"`
|
||||
RQLiteDSN string `yaml:"rqlite_dsn"`
|
||||
BootstrapPeers []string `yaml:"bootstrap_peers,omitempty"`
|
||||
EnableHTTPS bool `yaml:"enable_https,omitempty"`
|
||||
DomainName string `yaml:"domain_name,omitempty"`
|
||||
TLSCacheDir string `yaml:"tls_cache_dir,omitempty"`
|
||||
OlricServers []string `yaml:"olric_servers"`
|
||||
BaseDomain string `yaml:"base_domain"`
|
||||
NodePeerID string `yaml:"node_peer_id"`
|
||||
DataDir string `yaml:"data_dir"`
|
||||
OlricTimeout string `yaml:"olric_timeout,omitempty"`
|
||||
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url,omitempty"`
|
||||
IPFSAPIURL string `yaml:"ipfs_api_url,omitempty"`
|
||||
IPFSTimeout string `yaml:"ipfs_timeout,omitempty"`
|
||||
IPFSReplicationFactor int `yaml:"ipfs_replication_factor,omitempty"`
|
||||
}
|
||||
|
||||
// NewInstanceSpawner creates a new Gateway instance spawner
|
||||
@ -163,8 +171,36 @@ func (is *InstanceSpawner) SpawnInstance(ctx context.Context, cfg InstanceConfig
|
||||
zap.Strings("olric_servers", cfg.OlricServers),
|
||||
)
|
||||
|
||||
// Find the gateway binary (should be in same directory as the current process or PATH)
|
||||
gatewayBinary := "gateway"
|
||||
// Find the gateway binary - look in common locations
|
||||
var gatewayBinary string
|
||||
possiblePaths := []string{
|
||||
"./bin/gateway", // Development build
|
||||
"/usr/local/bin/orama-gateway", // System-wide install
|
||||
"/opt/orama/bin/gateway", // Package install
|
||||
}
|
||||
|
||||
for _, path := range possiblePaths {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
gatewayBinary = path
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Also check PATH
|
||||
if gatewayBinary == "" {
|
||||
if path, err := exec.LookPath("orama-gateway"); err == nil {
|
||||
gatewayBinary = path
|
||||
}
|
||||
}
|
||||
|
||||
if gatewayBinary == "" {
|
||||
return nil, &InstanceError{
|
||||
Message: "gateway binary not found (checked ./bin/gateway, /usr/local/bin/orama-gateway, /opt/orama/bin/gateway, PATH)",
|
||||
Cause: nil,
|
||||
}
|
||||
}
|
||||
|
||||
instance.logger.Info("Found gateway binary", zap.String("path", gatewayBinary))
|
||||
|
||||
// Create command
|
||||
cmd := exec.CommandContext(ctx, gatewayBinary, "--config", configPath)
|
||||
@ -237,9 +273,8 @@ func (is *InstanceSpawner) generateConfig(configPath string, cfg InstanceConfig,
|
||||
ClientNamespace: cfg.Namespace,
|
||||
RQLiteDSN: cfg.RQLiteDSN,
|
||||
OlricServers: cfg.OlricServers,
|
||||
BaseDomain: cfg.BaseDomain,
|
||||
NodePeerID: cfg.NodePeerID,
|
||||
DataDir: dataDir,
|
||||
// Note: DomainName is used for HTTPS/TLS, not needed for namespace gateways in dev mode
|
||||
DomainName: cfg.BaseDomain,
|
||||
}
|
||||
|
||||
data, err := yaml.Marshal(gatewayCfg)
|
||||
|
||||
@ -201,6 +201,10 @@ func isPublicPath(p string) bool {
|
||||
case "/health", "/v1/health", "/status", "/v1/status", "/v1/auth/jwks", "/.well-known/jwks.json", "/v1/version", "/v1/auth/login", "/v1/auth/challenge", "/v1/auth/verify", "/v1/auth/register", "/v1/auth/refresh", "/v1/auth/logout", "/v1/auth/api-key", "/v1/auth/simple-key", "/v1/network/status", "/v1/network/peers", "/v1/internal/tls/check", "/v1/internal/acme/present", "/v1/internal/acme/cleanup":
|
||||
return true
|
||||
default:
|
||||
// Also exempt namespace status polling endpoint
|
||||
if strings.HasPrefix(p, "/v1/namespace/status") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,6 +45,9 @@ func (g *Gateway) Routes() http.Handler {
|
||||
g.ormHTTP.RegisterRoutes(mux)
|
||||
}
|
||||
|
||||
// namespace cluster status (public endpoint for polling during provisioning)
|
||||
mux.HandleFunc("/v1/namespace/status", g.namespaceClusterStatusHandler)
|
||||
|
||||
// network
|
||||
mux.HandleFunc("/v1/network/status", g.networkStatusHandler)
|
||||
mux.HandleFunc("/v1/network/peers", g.networkPeersHandler)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -82,14 +82,60 @@ func (npa *NamespacePortAllocator) AllocatePortBlock(ctx context.Context, nodeID
|
||||
|
||||
// tryAllocatePortBlock attempts to allocate a port block (single attempt)
|
||||
func (npa *NamespacePortAllocator) tryAllocatePortBlock(ctx context.Context, nodeID, namespaceClusterID string) (*PortBlock, error) {
|
||||
// Query all allocated port blocks on this node
|
||||
// In dev environments where all nodes share the same IP, we need to track
|
||||
// allocations by IP address to avoid port conflicts. First get this node's IP.
|
||||
var nodeInfos []struct {
|
||||
IPAddress string `db:"ip_address"`
|
||||
}
|
||||
nodeQuery := `SELECT ip_address FROM dns_nodes WHERE id = ? LIMIT 1`
|
||||
if err := npa.db.Query(ctx, &nodeInfos, nodeQuery, nodeID); err != nil || len(nodeInfos) == 0 {
|
||||
// Fallback: if we can't get the IP, allocate per node_id only
|
||||
npa.logger.Debug("Could not get node IP, falling back to node_id-only allocation",
|
||||
zap.String("node_id", nodeID),
|
||||
)
|
||||
}
|
||||
|
||||
// Query all allocated port blocks. If nodes share the same IP, we need to
|
||||
// check allocations by IP address to prevent port conflicts.
|
||||
type portRow struct {
|
||||
PortStart int `db:"port_start"`
|
||||
}
|
||||
|
||||
var allocatedBlocks []portRow
|
||||
query := `SELECT port_start FROM namespace_port_allocations WHERE node_id = ? ORDER BY port_start ASC`
|
||||
err := npa.db.Query(ctx, &allocatedBlocks, query, nodeID)
|
||||
var query string
|
||||
var err error
|
||||
|
||||
if len(nodeInfos) > 0 && nodeInfos[0].IPAddress != "" {
|
||||
// Check if other nodes share this IP - if so, allocate globally by IP
|
||||
var sameIPCount []struct {
|
||||
Count int `db:"count"`
|
||||
}
|
||||
countQuery := `SELECT COUNT(DISTINCT id) as count FROM dns_nodes WHERE ip_address = ?`
|
||||
if err := npa.db.Query(ctx, &sameIPCount, countQuery, nodeInfos[0].IPAddress); err == nil && len(sameIPCount) > 0 && sameIPCount[0].Count > 1 {
|
||||
// Multiple nodes share this IP (dev environment) - allocate globally
|
||||
query = `
|
||||
SELECT npa.port_start
|
||||
FROM namespace_port_allocations npa
|
||||
JOIN dns_nodes dn ON npa.node_id = dn.id
|
||||
WHERE dn.ip_address = ?
|
||||
ORDER BY npa.port_start ASC
|
||||
`
|
||||
err = npa.db.Query(ctx, &allocatedBlocks, query, nodeInfos[0].IPAddress)
|
||||
npa.logger.Debug("Multiple nodes share IP, allocating globally",
|
||||
zap.String("ip_address", nodeInfos[0].IPAddress),
|
||||
zap.Int("same_ip_nodes", sameIPCount[0].Count),
|
||||
)
|
||||
} else {
|
||||
// Single node per IP (production) - allocate per node
|
||||
query = `SELECT port_start FROM namespace_port_allocations WHERE node_id = ? ORDER BY port_start ASC`
|
||||
err = npa.db.Query(ctx, &allocatedBlocks, query, nodeID)
|
||||
}
|
||||
} else {
|
||||
// No IP info - allocate per node_id
|
||||
query = `SELECT port_start FROM namespace_port_allocations WHERE node_id = ? ORDER BY port_start ASC`
|
||||
err = npa.db.Query(ctx, &allocatedBlocks, query, nodeID)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, &ClusterError{
|
||||
Message: "failed to query allocated ports",
|
||||
|
||||
@ -10,6 +10,7 @@ import (
|
||||
"github.com/DeBrosOfficial/network/pkg/gateway"
|
||||
"github.com/DeBrosOfficial/network/pkg/ipfs"
|
||||
"github.com/DeBrosOfficial/network/pkg/logging"
|
||||
"github.com/DeBrosOfficial/network/pkg/namespace"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
@ -52,6 +53,20 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
|
||||
}
|
||||
n.apiGateway = apiGateway
|
||||
|
||||
// Wire up ClusterManager for per-namespace cluster provisioning
|
||||
if ormClient := apiGateway.GetORMClient(); ormClient != nil {
|
||||
baseDataDir := filepath.Join(os.ExpandEnv(n.config.Node.DataDir), "..", "data", "namespaces")
|
||||
clusterCfg := namespace.ClusterManagerConfig{
|
||||
BaseDomain: n.config.HTTPGateway.BaseDomain,
|
||||
BaseDataDir: baseDataDir,
|
||||
}
|
||||
clusterManager := namespace.NewClusterManager(ormClient, clusterCfg, n.logger.Logger)
|
||||
apiGateway.SetClusterProvisioner(clusterManager)
|
||||
n.logger.ComponentInfo(logging.ComponentNode, "Namespace cluster provisioning enabled",
|
||||
zap.String("base_domain", clusterCfg.BaseDomain),
|
||||
zap.String("base_data_dir", baseDataDir))
|
||||
}
|
||||
|
||||
go func() {
|
||||
server := &http.Server{
|
||||
Addr: gwCfg.ListenAddr,
|
||||
|
||||
@ -3,7 +3,7 @@ package olric
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
@ -11,7 +11,6 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/tlsutil"
|
||||
"go.uber.org/zap"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
@ -382,12 +381,10 @@ func (is *InstanceSpawner) HealthCheck(ctx context.Context, ns, nodeID string) (
|
||||
|
||||
// waitForInstanceReady waits for the Olric instance to be ready
|
||||
func (is *InstanceSpawner) waitForInstanceReady(ctx context.Context, instance *OlricInstance) error {
|
||||
client := tlsutil.NewHTTPClient(2 * time.Second)
|
||||
// Olric doesn't have a standard /ready endpoint, so we check if the process
|
||||
// is running and the memberlist port is accepting connections
|
||||
|
||||
// Olric health check endpoint
|
||||
url := fmt.Sprintf("http://localhost:%d/ready", instance.HTTPPort)
|
||||
|
||||
maxAttempts := 120 // 2 minutes
|
||||
maxAttempts := 30 // 30 seconds
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@ -395,19 +392,35 @@ func (is *InstanceSpawner) waitForInstanceReady(ctx context.Context, instance *O
|
||||
case <-time.After(1 * time.Second):
|
||||
}
|
||||
|
||||
resp, err := client.Get(url)
|
||||
// Check if the process is still running
|
||||
if instance.cmd != nil && instance.cmd.ProcessState != nil && instance.cmd.ProcessState.Exited() {
|
||||
return fmt.Errorf("Olric process exited unexpectedly")
|
||||
}
|
||||
|
||||
// Try to connect to the memberlist port to verify it's accepting connections
|
||||
// Use the advertise address since Olric may bind to a specific IP
|
||||
addr := fmt.Sprintf("%s:%d", instance.AdvertiseAddr, instance.MemberlistPort)
|
||||
if instance.AdvertiseAddr == "" {
|
||||
addr = fmt.Sprintf("localhost:%d", instance.MemberlistPort)
|
||||
}
|
||||
|
||||
conn, err := net.DialTimeout("tcp", addr, 2*time.Second)
|
||||
if err != nil {
|
||||
instance.logger.Debug("Waiting for Olric memberlist",
|
||||
zap.Int("attempt", i+1),
|
||||
zap.String("addr", addr),
|
||||
zap.Error(err),
|
||||
)
|
||||
continue
|
||||
}
|
||||
resp.Body.Close()
|
||||
conn.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
instance.logger.Debug("Olric instance ready",
|
||||
zap.Int("attempts", i+1),
|
||||
zap.String("addr", addr),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("Olric did not become ready within timeout")
|
||||
}
|
||||
@ -453,23 +466,20 @@ func (is *InstanceSpawner) monitorInstance(instance *OlricInstance) {
|
||||
}
|
||||
}
|
||||
|
||||
// IsHealthy checks if the Olric instance is healthy
|
||||
// IsHealthy checks if the Olric instance is healthy by verifying the memberlist port is accepting connections
|
||||
func (oi *OlricInstance) IsHealthy(ctx context.Context) (bool, error) {
|
||||
url := fmt.Sprintf("http://localhost:%d/ready", oi.HTTPPort)
|
||||
client := tlsutil.NewHTTPClient(5 * time.Second)
|
||||
// Olric doesn't have a standard /ready HTTP endpoint, so we check memberlist connectivity
|
||||
addr := fmt.Sprintf("%s:%d", oi.AdvertiseAddr, oi.MemberlistPort)
|
||||
if oi.AdvertiseAddr == "" || oi.AdvertiseAddr == "0.0.0.0" {
|
||||
addr = fmt.Sprintf("localhost:%d", oi.MemberlistPort)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
conn, err := net.DialTimeout("tcp", addr, 2*time.Second)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
return resp.StatusCode == http.StatusOK, nil
|
||||
conn.Close()
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// DSN returns the connection address for this Olric instance
|
||||
|
||||
@ -2,585 +2,248 @@ package rqlite
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/tlsutil"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// InstanceNodeStatus represents the status of an instance (local type to avoid import cycle)
|
||||
type InstanceNodeStatus string
|
||||
|
||||
const (
|
||||
InstanceStatusPending InstanceNodeStatus = "pending"
|
||||
InstanceStatusStarting InstanceNodeStatus = "starting"
|
||||
InstanceStatusRunning InstanceNodeStatus = "running"
|
||||
InstanceStatusStopped InstanceNodeStatus = "stopped"
|
||||
InstanceStatusFailed InstanceNodeStatus = "failed"
|
||||
)
|
||||
|
||||
// InstanceError represents an error during instance operations (local type to avoid import cycle)
|
||||
type InstanceError struct {
|
||||
Message string
|
||||
Cause error
|
||||
}
|
||||
|
||||
func (e *InstanceError) Error() string {
|
||||
if e.Cause != nil {
|
||||
return e.Message + ": " + e.Cause.Error()
|
||||
}
|
||||
return e.Message
|
||||
}
|
||||
|
||||
func (e *InstanceError) Unwrap() error {
|
||||
return e.Cause
|
||||
}
|
||||
|
||||
// InstanceSpawner manages multiple RQLite instances for namespace clusters.
|
||||
// Each namespace gets its own RQLite cluster with dedicated ports and data directories.
|
||||
type InstanceSpawner struct {
|
||||
logger *zap.Logger
|
||||
baseDir string // Base directory for all namespace data (e.g., ~/.orama/data/namespaces)
|
||||
instances map[string]*RQLiteInstance
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// RQLiteInstance represents a running RQLite instance for a namespace
|
||||
type RQLiteInstance struct {
|
||||
Namespace string
|
||||
NodeID string
|
||||
HTTPPort int
|
||||
RaftPort int
|
||||
HTTPAdvAddress string
|
||||
RaftAdvAddress string
|
||||
JoinAddresses []string
|
||||
DataDir string
|
||||
IsLeader bool
|
||||
PID int
|
||||
Status InstanceNodeStatus
|
||||
StartedAt time.Time
|
||||
LastHealthCheck time.Time
|
||||
cmd *exec.Cmd
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
// InstanceConfig holds configuration for spawning an RQLite instance
|
||||
// InstanceConfig contains configuration for spawning a RQLite instance
|
||||
type InstanceConfig struct {
|
||||
Namespace string // Namespace name (e.g., "alice")
|
||||
NodeID string // Physical node ID
|
||||
Namespace string // Namespace this instance belongs to
|
||||
NodeID string // Node ID where this instance runs
|
||||
HTTPPort int // HTTP API port
|
||||
RaftPort int // Raft consensus port
|
||||
HTTPAdvAddress string // Advertised HTTP address (e.g., "192.168.1.10:10000")
|
||||
RaftAdvAddress string // Advertised Raft address (e.g., "192.168.1.10:10001")
|
||||
JoinAddresses []string // Addresses of existing cluster members to join
|
||||
IsLeader bool // Whether this is the initial leader node
|
||||
HTTPAdvAddress string // Advertised HTTP address (e.g., "192.168.1.1:10000")
|
||||
RaftAdvAddress string // Advertised Raft address (e.g., "192.168.1.1:10001")
|
||||
JoinAddresses []string // Addresses to join (e.g., ["192.168.1.2:10001"])
|
||||
DataDir string // Data directory for this instance
|
||||
IsLeader bool // Whether this is the first node (creates cluster)
|
||||
}
|
||||
|
||||
// Instance represents a running RQLite instance
|
||||
type Instance struct {
|
||||
Config InstanceConfig
|
||||
Process *os.Process
|
||||
PID int
|
||||
}
|
||||
|
||||
// InstanceSpawner manages RQLite instance lifecycle for namespaces
|
||||
type InstanceSpawner struct {
|
||||
baseDataDir string // Base directory for namespace data (e.g., ~/.orama/data/namespaces)
|
||||
rqlitePath string // Path to rqlited binary
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
// NewInstanceSpawner creates a new RQLite instance spawner
|
||||
func NewInstanceSpawner(baseDir string, logger *zap.Logger) *InstanceSpawner {
|
||||
func NewInstanceSpawner(baseDataDir string, logger *zap.Logger) *InstanceSpawner {
|
||||
// Find rqlited binary
|
||||
rqlitePath := "rqlited" // Will use PATH
|
||||
if path, err := exec.LookPath("rqlited"); err == nil {
|
||||
rqlitePath = path
|
||||
}
|
||||
|
||||
return &InstanceSpawner{
|
||||
logger: logger.With(zap.String("component", "rqlite-instance-spawner")),
|
||||
baseDir: baseDir,
|
||||
instances: make(map[string]*RQLiteInstance),
|
||||
baseDataDir: baseDataDir,
|
||||
rqlitePath: rqlitePath,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// instanceKey generates a unique key for an instance based on namespace and node
|
||||
func instanceKey(namespace, nodeID string) string {
|
||||
return fmt.Sprintf("%s:%s", namespace, nodeID)
|
||||
}
|
||||
|
||||
// SpawnInstance starts a new RQLite instance for a namespace on a specific node.
|
||||
// Returns the instance info or an error if spawning fails.
|
||||
func (is *InstanceSpawner) SpawnInstance(ctx context.Context, cfg InstanceConfig) (*RQLiteInstance, error) {
|
||||
key := instanceKey(cfg.Namespace, cfg.NodeID)
|
||||
|
||||
is.mu.Lock()
|
||||
if existing, ok := is.instances[key]; ok {
|
||||
is.mu.Unlock()
|
||||
// Instance already exists, return it if running
|
||||
if existing.Status == InstanceStatusRunning {
|
||||
return existing, nil
|
||||
}
|
||||
// Otherwise, remove it and start fresh
|
||||
is.mu.Lock()
|
||||
delete(is.instances, key)
|
||||
}
|
||||
is.mu.Unlock()
|
||||
|
||||
// SpawnInstance starts a new RQLite instance with the given configuration
|
||||
func (is *InstanceSpawner) SpawnInstance(ctx context.Context, cfg InstanceConfig) (*Instance, error) {
|
||||
// Create data directory
|
||||
dataDir := filepath.Join(is.baseDir, cfg.Namespace, "rqlite", cfg.NodeID)
|
||||
dataDir := cfg.DataDir
|
||||
if dataDir == "" {
|
||||
dataDir = filepath.Join(is.baseDataDir, cfg.Namespace, "rqlite", cfg.NodeID)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(dataDir, 0755); err != nil {
|
||||
return nil, &InstanceError{
|
||||
Message: "failed to create data directory",
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
|
||||
// Create logs directory
|
||||
logsDir := filepath.Join(is.baseDir, cfg.Namespace, "logs")
|
||||
if err := os.MkdirAll(logsDir, 0755); err != nil {
|
||||
return nil, &InstanceError{
|
||||
Message: "failed to create logs directory",
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
|
||||
instance := &RQLiteInstance{
|
||||
Namespace: cfg.Namespace,
|
||||
NodeID: cfg.NodeID,
|
||||
HTTPPort: cfg.HTTPPort,
|
||||
RaftPort: cfg.RaftPort,
|
||||
HTTPAdvAddress: cfg.HTTPAdvAddress,
|
||||
RaftAdvAddress: cfg.RaftAdvAddress,
|
||||
JoinAddresses: cfg.JoinAddresses,
|
||||
DataDir: dataDir,
|
||||
IsLeader: cfg.IsLeader,
|
||||
Status: InstanceStatusStarting,
|
||||
logger: is.logger.With(zap.String("namespace", cfg.Namespace), zap.String("node_id", cfg.NodeID)),
|
||||
return nil, fmt.Errorf("failed to create data directory: %w", err)
|
||||
}
|
||||
|
||||
// Build command arguments
|
||||
// Note: All flags must come BEFORE the data directory argument
|
||||
args := []string{
|
||||
"-http-addr", fmt.Sprintf("0.0.0.0:%d", cfg.HTTPPort),
|
||||
"-http-adv-addr", cfg.HTTPAdvAddress,
|
||||
"-raft-addr", fmt.Sprintf("0.0.0.0:%d", cfg.RaftPort),
|
||||
"-http-adv-addr", cfg.HTTPAdvAddress,
|
||||
"-raft-adv-addr", cfg.RaftAdvAddress,
|
||||
}
|
||||
|
||||
// Handle cluster joining
|
||||
if len(cfg.JoinAddresses) > 0 && !cfg.IsLeader {
|
||||
// Remove peers.json if it exists to avoid stale cluster state
|
||||
peersJSONPath := filepath.Join(dataDir, "raft", "peers.json")
|
||||
if _, err := os.Stat(peersJSONPath); err == nil {
|
||||
instance.logger.Debug("Removing existing peers.json before joining cluster",
|
||||
zap.String("path", peersJSONPath))
|
||||
_ = os.Remove(peersJSONPath)
|
||||
}
|
||||
|
||||
// Prepare join addresses (strip http:// prefix if present)
|
||||
joinAddrs := make([]string, 0, len(cfg.JoinAddresses))
|
||||
// Add join addresses if not the leader (must be before data directory)
|
||||
if !cfg.IsLeader && len(cfg.JoinAddresses) > 0 {
|
||||
for _, addr := range cfg.JoinAddresses {
|
||||
addr = strings.TrimPrefix(addr, "http://")
|
||||
addr = strings.TrimPrefix(addr, "https://")
|
||||
joinAddrs = append(joinAddrs, addr)
|
||||
args = append(args, "-join", addr)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for join targets to be available
|
||||
if err := is.waitForJoinTargets(ctx, cfg.JoinAddresses); err != nil {
|
||||
instance.logger.Warn("Join targets not all reachable, will still attempt join",
|
||||
zap.Error(err))
|
||||
}
|
||||
|
||||
args = append(args,
|
||||
"-join", strings.Join(joinAddrs, ","),
|
||||
"-join-as", cfg.RaftAdvAddress,
|
||||
"-join-attempts", "30",
|
||||
"-join-interval", "10s",
|
||||
)
|
||||
}
|
||||
|
||||
// Add data directory as final argument
|
||||
// Data directory must be the last argument
|
||||
args = append(args, dataDir)
|
||||
|
||||
instance.logger.Info("Starting RQLite instance",
|
||||
is.logger.Info("Spawning RQLite instance",
|
||||
zap.String("namespace", cfg.Namespace),
|
||||
zap.String("node_id", cfg.NodeID),
|
||||
zap.Int("http_port", cfg.HTTPPort),
|
||||
zap.Int("raft_port", cfg.RaftPort),
|
||||
zap.Strings("join_addresses", cfg.JoinAddresses),
|
||||
zap.Bool("is_leader", cfg.IsLeader),
|
||||
zap.Strings("join_addresses", cfg.JoinAddresses),
|
||||
)
|
||||
|
||||
// Create command
|
||||
cmd := exec.CommandContext(ctx, "rqlited", args...)
|
||||
instance.cmd = cmd
|
||||
|
||||
// Setup logging
|
||||
logPath := filepath.Join(logsDir, fmt.Sprintf("rqlite-%s.log", cfg.NodeID))
|
||||
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
||||
if err != nil {
|
||||
return nil, &InstanceError{
|
||||
Message: "failed to open log file",
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
|
||||
// Start the process
|
||||
cmd := exec.CommandContext(ctx, is.rqlitePath, args...)
|
||||
cmd.Dir = dataDir
|
||||
|
||||
// Log output
|
||||
logFile, err := os.OpenFile(
|
||||
filepath.Join(dataDir, "rqlite.log"),
|
||||
os.O_CREATE|os.O_WRONLY|os.O_APPEND,
|
||||
0644,
|
||||
)
|
||||
if err == nil {
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
logFile.Close()
|
||||
return nil, &InstanceError{
|
||||
Message: "failed to start RQLite process",
|
||||
Cause: err,
|
||||
}
|
||||
return nil, fmt.Errorf("failed to start rqlited: %w", err)
|
||||
}
|
||||
|
||||
logFile.Close()
|
||||
|
||||
instance.PID = cmd.Process.Pid
|
||||
instance.StartedAt = time.Now()
|
||||
|
||||
// Store instance
|
||||
is.mu.Lock()
|
||||
is.instances[key] = instance
|
||||
is.mu.Unlock()
|
||||
|
||||
// Wait for instance to be ready
|
||||
if err := is.waitForInstanceReady(ctx, instance); err != nil {
|
||||
// Kill the process on failure
|
||||
if cmd.Process != nil {
|
||||
_ = cmd.Process.Kill()
|
||||
}
|
||||
is.mu.Lock()
|
||||
delete(is.instances, key)
|
||||
is.mu.Unlock()
|
||||
return nil, &InstanceError{
|
||||
Message: "RQLite instance did not become ready",
|
||||
Cause: err,
|
||||
}
|
||||
instance := &Instance{
|
||||
Config: cfg,
|
||||
Process: cmd.Process,
|
||||
PID: cmd.Process.Pid,
|
||||
}
|
||||
|
||||
instance.Status = InstanceStatusRunning
|
||||
instance.LastHealthCheck = time.Now()
|
||||
// Wait for the instance to be ready
|
||||
if err := is.waitForReady(ctx, cfg.HTTPPort); err != nil {
|
||||
// Kill the process if it didn't start properly
|
||||
cmd.Process.Kill()
|
||||
return nil, fmt.Errorf("instance failed to become ready: %w", err)
|
||||
}
|
||||
|
||||
instance.logger.Info("RQLite instance started successfully",
|
||||
is.logger.Info("RQLite instance started successfully",
|
||||
zap.String("namespace", cfg.Namespace),
|
||||
zap.Int("pid", instance.PID),
|
||||
)
|
||||
|
||||
// Start background process monitor
|
||||
go is.monitorInstance(instance)
|
||||
|
||||
return instance, nil
|
||||
}
|
||||
|
||||
// StopInstance stops an RQLite instance for a namespace on a specific node
|
||||
func (is *InstanceSpawner) StopInstance(ctx context.Context, namespace, nodeID string) error {
|
||||
key := instanceKey(namespace, nodeID)
|
||||
|
||||
is.mu.Lock()
|
||||
instance, ok := is.instances[key]
|
||||
if !ok {
|
||||
is.mu.Unlock()
|
||||
return nil // Already stopped
|
||||
}
|
||||
delete(is.instances, key)
|
||||
is.mu.Unlock()
|
||||
|
||||
if instance.cmd != nil && instance.cmd.Process != nil {
|
||||
instance.logger.Info("Stopping RQLite instance", zap.Int("pid", instance.PID))
|
||||
|
||||
// Send SIGTERM for graceful shutdown
|
||||
if err := instance.cmd.Process.Signal(os.Interrupt); err != nil {
|
||||
// If SIGTERM fails, kill it
|
||||
_ = instance.cmd.Process.Kill()
|
||||
}
|
||||
|
||||
// Wait for process to exit with timeout
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- instance.cmd.Wait()
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
instance.logger.Info("RQLite instance stopped gracefully")
|
||||
case <-time.After(10 * time.Second):
|
||||
instance.logger.Warn("RQLite instance did not stop gracefully, killing")
|
||||
_ = instance.cmd.Process.Kill()
|
||||
case <-ctx.Done():
|
||||
_ = instance.cmd.Process.Kill()
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
instance.Status = InstanceStatusStopped
|
||||
return nil
|
||||
}
|
||||
|
||||
// StopAllInstances stops all RQLite instances for a namespace
|
||||
func (is *InstanceSpawner) StopAllInstances(ctx context.Context, ns string) error {
|
||||
is.mu.RLock()
|
||||
var keys []string
|
||||
for key, inst := range is.instances {
|
||||
if inst.Namespace == ns {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
is.mu.RUnlock()
|
||||
|
||||
var lastErr error
|
||||
for _, key := range keys {
|
||||
parts := strings.SplitN(key, ":", 2)
|
||||
if len(parts) == 2 {
|
||||
if err := is.StopInstance(ctx, parts[0], parts[1]); err != nil {
|
||||
lastErr = err
|
||||
}
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
// GetInstance returns the instance for a namespace on a specific node
|
||||
func (is *InstanceSpawner) GetInstance(namespace, nodeID string) (*RQLiteInstance, bool) {
|
||||
is.mu.RLock()
|
||||
defer is.mu.RUnlock()
|
||||
|
||||
instance, ok := is.instances[instanceKey(namespace, nodeID)]
|
||||
return instance, ok
|
||||
}
|
||||
|
||||
// GetNamespaceInstances returns all instances for a namespace
|
||||
func (is *InstanceSpawner) GetNamespaceInstances(ns string) []*RQLiteInstance {
|
||||
is.mu.RLock()
|
||||
defer is.mu.RUnlock()
|
||||
|
||||
var instances []*RQLiteInstance
|
||||
for _, inst := range is.instances {
|
||||
if inst.Namespace == ns {
|
||||
instances = append(instances, inst)
|
||||
}
|
||||
}
|
||||
return instances
|
||||
}
|
||||
|
||||
// HealthCheck checks if an instance is healthy
|
||||
func (is *InstanceSpawner) HealthCheck(ctx context.Context, namespace, nodeID string) (bool, error) {
|
||||
instance, ok := is.GetInstance(namespace, nodeID)
|
||||
if !ok {
|
||||
return false, &InstanceError{Message: "instance not found"}
|
||||
}
|
||||
|
||||
healthy, err := instance.IsHealthy(ctx)
|
||||
if healthy {
|
||||
is.mu.Lock()
|
||||
instance.LastHealthCheck = time.Now()
|
||||
is.mu.Unlock()
|
||||
}
|
||||
return healthy, err
|
||||
}
|
||||
|
||||
// waitForJoinTargets waits for join target nodes to be reachable
|
||||
func (is *InstanceSpawner) waitForJoinTargets(ctx context.Context, joinAddresses []string) error {
|
||||
timeout := 2 * time.Minute
|
||||
deadline := time.Now().Add(timeout)
|
||||
client := tlsutil.NewHTTPClient(5 * time.Second)
|
||||
// waitForReady waits for the RQLite instance to be ready to accept connections
|
||||
func (is *InstanceSpawner) waitForReady(ctx context.Context, httpPort int) error {
|
||||
url := fmt.Sprintf("http://localhost:%d/status", httpPort)
|
||||
client := &http.Client{Timeout: 2 * time.Second}
|
||||
|
||||
deadline := time.Now().Add(30 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
allReachable := true
|
||||
for _, addr := range joinAddresses {
|
||||
statusURL := addr
|
||||
if !strings.HasPrefix(addr, "http") {
|
||||
statusURL = "http://" + addr
|
||||
}
|
||||
statusURL = strings.TrimRight(statusURL, "/") + "/status"
|
||||
|
||||
resp, err := client.Get(statusURL)
|
||||
if err != nil {
|
||||
allReachable = false
|
||||
break
|
||||
}
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
allReachable = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if allReachable {
|
||||
return nil
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(2 * time.Second):
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Errorf("join targets not reachable within timeout")
|
||||
}
|
||||
|
||||
// waitForInstanceReady waits for the RQLite instance to be ready
|
||||
func (is *InstanceSpawner) waitForInstanceReady(ctx context.Context, instance *RQLiteInstance) error {
|
||||
url := fmt.Sprintf("http://localhost:%d/status", instance.HTTPPort)
|
||||
client := tlsutil.NewHTTPClient(2 * time.Second)
|
||||
|
||||
// Longer timeout for joining nodes as they need to sync
|
||||
maxAttempts := 180 // 3 minutes
|
||||
if len(instance.JoinAddresses) > 0 {
|
||||
maxAttempts = 300 // 5 minutes for joiners
|
||||
}
|
||||
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(1 * time.Second):
|
||||
default:
|
||||
}
|
||||
|
||||
resp, err := client.Get(url)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
resp.Body.Close()
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var statusResp map[string]interface{}
|
||||
if err := json.Unmarshal(body, &statusResp); err == nil {
|
||||
if raft, ok := statusResp["raft"].(map[string]interface{}); ok {
|
||||
state, _ := raft["state"].(string)
|
||||
if state == "leader" || state == "follower" {
|
||||
instance.logger.Debug("RQLite instance ready",
|
||||
zap.String("state", state),
|
||||
zap.Int("attempts", i+1),
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
|
||||
return fmt.Errorf("timeout waiting for RQLite to be ready on port %d", httpPort)
|
||||
}
|
||||
|
||||
// StopInstance stops a running RQLite instance
|
||||
func (is *InstanceSpawner) StopInstance(ctx context.Context, instance *Instance) error {
|
||||
if instance == nil || instance.Process == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
is.logger.Info("Stopping RQLite instance",
|
||||
zap.String("namespace", instance.Config.Namespace),
|
||||
zap.Int("pid", instance.PID),
|
||||
)
|
||||
|
||||
// Send SIGTERM for graceful shutdown
|
||||
if err := instance.Process.Signal(os.Interrupt); err != nil {
|
||||
// If SIGTERM fails, try SIGKILL
|
||||
if err := instance.Process.Kill(); err != nil {
|
||||
return fmt.Errorf("failed to kill process: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for process to exit
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
_, err := instance.Process.Wait()
|
||||
done <- err
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
instance.Process.Kill()
|
||||
return ctx.Err()
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
is.logger.Warn("Process exited with error", zap.Error(err))
|
||||
}
|
||||
case <-time.After(10 * time.Second):
|
||||
instance.Process.Kill()
|
||||
}
|
||||
|
||||
is.logger.Info("RQLite instance stopped",
|
||||
zap.String("namespace", instance.Config.Namespace),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
// Backwards compatibility - if no raft status, consider ready
|
||||
|
||||
// StopInstanceByPID stops a RQLite instance by its PID
|
||||
func (is *InstanceSpawner) StopInstanceByPID(pid int) error {
|
||||
process, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("process not found: %w", err)
|
||||
}
|
||||
|
||||
// Send SIGTERM
|
||||
if err := process.Signal(os.Interrupt); err != nil {
|
||||
// Try SIGKILL
|
||||
if err := process.Kill(); err != nil {
|
||||
return fmt.Errorf("failed to kill process: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// IsInstanceRunning checks if a RQLite instance is running
|
||||
func (is *InstanceSpawner) IsInstanceRunning(httpPort int) bool {
|
||||
url := fmt.Sprintf("http://localhost:%d/status", httpPort)
|
||||
client := &http.Client{Timeout: 2 * time.Second}
|
||||
|
||||
resp, err := client.Get(url)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
resp.Body.Close()
|
||||
return resp.StatusCode == http.StatusOK
|
||||
}
|
||||
|
||||
return fmt.Errorf("RQLite did not become ready within timeout")
|
||||
// GetDataDir returns the data directory path for a namespace RQLite instance
|
||||
func (is *InstanceSpawner) GetDataDir(namespace, nodeID string) string {
|
||||
return filepath.Join(is.baseDataDir, namespace, "rqlite", nodeID)
|
||||
}
|
||||
|
||||
// monitorInstance monitors an instance and updates its status
|
||||
func (is *InstanceSpawner) monitorInstance(instance *RQLiteInstance) {
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for range ticker.C {
|
||||
is.mu.RLock()
|
||||
key := instanceKey(instance.Namespace, instance.NodeID)
|
||||
_, exists := is.instances[key]
|
||||
is.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
// Instance was removed
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
healthy, _ := instance.IsHealthy(ctx)
|
||||
cancel()
|
||||
|
||||
is.mu.Lock()
|
||||
if healthy {
|
||||
instance.Status = InstanceStatusRunning
|
||||
instance.LastHealthCheck = time.Now()
|
||||
} else {
|
||||
instance.Status = InstanceStatusFailed
|
||||
instance.logger.Warn("RQLite instance health check failed")
|
||||
}
|
||||
is.mu.Unlock()
|
||||
|
||||
// Check if process is still running
|
||||
if instance.cmd != nil && instance.cmd.ProcessState != nil && instance.cmd.ProcessState.Exited() {
|
||||
is.mu.Lock()
|
||||
instance.Status = InstanceStatusStopped
|
||||
is.mu.Unlock()
|
||||
instance.logger.Warn("RQLite instance process exited unexpectedly")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// IsHealthy checks if the RQLite instance is healthy
|
||||
func (ri *RQLiteInstance) IsHealthy(ctx context.Context) (bool, error) {
|
||||
url := fmt.Sprintf("http://localhost:%d/status", ri.HTTPPort)
|
||||
client := tlsutil.NewHTTPClient(5 * time.Second)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return false, fmt.Errorf("status endpoint returned %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
var statusResp map[string]interface{}
|
||||
if err := json.Unmarshal(body, &statusResp); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if raft, ok := statusResp["raft"].(map[string]interface{}); ok {
|
||||
state, _ := raft["state"].(string)
|
||||
return state == "leader" || state == "follower", nil
|
||||
}
|
||||
|
||||
// Backwards compatibility
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// GetLeaderAddress returns the leader's address for the cluster
|
||||
func (ri *RQLiteInstance) GetLeaderAddress(ctx context.Context) (string, error) {
|
||||
url := fmt.Sprintf("http://localhost:%d/status", ri.HTTPPort)
|
||||
client := tlsutil.NewHTTPClient(5 * time.Second)
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var statusResp map[string]interface{}
|
||||
if err := json.Unmarshal(body, &statusResp); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if raft, ok := statusResp["raft"].(map[string]interface{}); ok {
|
||||
if leader, ok := raft["leader_addr"].(string); ok {
|
||||
return leader, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("leader address not found in status response")
|
||||
}
|
||||
|
||||
// DSN returns the connection string for this RQLite instance
|
||||
func (ri *RQLiteInstance) DSN() string {
|
||||
return fmt.Sprintf("http://localhost:%d", ri.HTTPPort)
|
||||
}
|
||||
|
||||
// AdvertisedDSN returns the advertised connection string for cluster communication
|
||||
func (ri *RQLiteInstance) AdvertisedDSN() string {
|
||||
return fmt.Sprintf("http://%s", ri.HTTPAdvAddress)
|
||||
// CleanupDataDir removes the data directory for a namespace RQLite instance
|
||||
func (is *InstanceSpawner) CleanupDataDir(namespace, nodeID string) error {
|
||||
dataDir := is.GetDataDir(namespace, nodeID)
|
||||
return os.RemoveAll(dataDir)
|
||||
}
|
||||
|
||||
@ -29,6 +29,12 @@ PORTS=(
|
||||
9096 9106 9116 9126 9136
|
||||
)
|
||||
|
||||
# Add namespace cluster ports (10000-10099)
|
||||
# These are dynamically allocated for per-namespace RQLite/Olric/Gateway instances
|
||||
for port in $(seq 10000 10099); do
|
||||
PORTS+=($port)
|
||||
done
|
||||
|
||||
killed_count=0
|
||||
killed_pids=()
|
||||
|
||||
@ -57,6 +63,41 @@ SPECIFIC_PATTERNS=(
|
||||
"anyone-client"
|
||||
)
|
||||
|
||||
# Kill namespace cluster processes (spawned by ClusterManager)
|
||||
# These are RQLite/Olric/Gateway instances running on ports 10000-10099
|
||||
NAMESPACE_DATA_DIR="$HOME/.orama/data/namespaces"
|
||||
if [[ -d "$NAMESPACE_DATA_DIR" ]]; then
|
||||
# Find rqlited processes started in namespace directories
|
||||
ns_pids=$(pgrep -f "rqlited.*$NAMESPACE_DATA_DIR" 2>/dev/null || true)
|
||||
if [[ -n "$ns_pids" ]]; then
|
||||
for pid in $ns_pids; do
|
||||
echo " Killing namespace rqlited process (PID: $pid)"
|
||||
kill -9 "$pid" 2>/dev/null || true
|
||||
killed_pids+=("$pid")
|
||||
done
|
||||
fi
|
||||
|
||||
# Find olric-server processes started for namespaces (check env var or config path)
|
||||
ns_olric_pids=$(pgrep -f "olric-server.*$NAMESPACE_DATA_DIR" 2>/dev/null || true)
|
||||
if [[ -n "$ns_olric_pids" ]]; then
|
||||
for pid in $ns_olric_pids; do
|
||||
echo " Killing namespace olric-server process (PID: $pid)"
|
||||
kill -9 "$pid" 2>/dev/null || true
|
||||
killed_pids+=("$pid")
|
||||
done
|
||||
fi
|
||||
|
||||
# Find gateway processes started for namespaces
|
||||
ns_gw_pids=$(pgrep -f "gateway.*--config.*$NAMESPACE_DATA_DIR" 2>/dev/null || true)
|
||||
if [[ -n "$ns_gw_pids" ]]; then
|
||||
for pid in $ns_gw_pids; do
|
||||
echo " Killing namespace gateway process (PID: $pid)"
|
||||
kill -9 "$pid" 2>/dev/null || true
|
||||
killed_pids+=("$pid")
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
for pattern in "${SPECIFIC_PATTERNS[@]}"; do
|
||||
# Use exact pattern matching to avoid false positives
|
||||
all_pids=$(pgrep -f "$pattern" 2>/dev/null || true)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user