mirror of
https://github.com/DeBrosOfficial/network.git
synced 2026-01-30 11:33:04 +00:00
505 lines
14 KiB
Go
505 lines
14 KiB
Go
package gateway
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/DeBrosOfficial/network/pkg/tlsutil"
|
|
"go.uber.org/zap"
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// InstanceNodeStatus represents the status of an instance (local type to avoid import cycle)
|
|
type InstanceNodeStatus string
|
|
|
|
const (
|
|
InstanceStatusPending InstanceNodeStatus = "pending"
|
|
InstanceStatusStarting InstanceNodeStatus = "starting"
|
|
InstanceStatusRunning InstanceNodeStatus = "running"
|
|
InstanceStatusStopped InstanceNodeStatus = "stopped"
|
|
InstanceStatusFailed InstanceNodeStatus = "failed"
|
|
)
|
|
|
|
// InstanceError represents an error during instance operations (local type to avoid import cycle)
|
|
type InstanceError struct {
|
|
Message string
|
|
Cause error
|
|
}
|
|
|
|
func (e *InstanceError) Error() string {
|
|
if e.Cause != nil {
|
|
return e.Message + ": " + e.Cause.Error()
|
|
}
|
|
return e.Message
|
|
}
|
|
|
|
func (e *InstanceError) Unwrap() error {
|
|
return e.Cause
|
|
}
|
|
|
|
// InstanceSpawner manages multiple Gateway instances for namespace clusters.
|
|
// Each namespace gets its own gateway instances that connect to its dedicated RQLite and Olric clusters.
|
|
type InstanceSpawner struct {
|
|
logger *zap.Logger
|
|
baseDir string // Base directory for all namespace data (e.g., ~/.orama/data/namespaces)
|
|
instances map[string]*GatewayInstance
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
// GatewayInstance represents a running Gateway instance for a namespace
|
|
type GatewayInstance struct {
|
|
Namespace string
|
|
NodeID string
|
|
HTTPPort int
|
|
BaseDomain string
|
|
RQLiteDSN string // Connection to namespace RQLite
|
|
OlricServers []string // Connection to namespace Olric
|
|
ConfigPath string
|
|
PID int
|
|
Status InstanceNodeStatus
|
|
StartedAt time.Time
|
|
LastHealthCheck time.Time
|
|
cmd *exec.Cmd
|
|
logger *zap.Logger
|
|
}
|
|
|
|
// InstanceConfig holds configuration for spawning a Gateway instance
|
|
type InstanceConfig struct {
|
|
Namespace string // Namespace name (e.g., "alice")
|
|
NodeID string // Physical node ID
|
|
HTTPPort int // HTTP API port
|
|
BaseDomain string // Base domain (e.g., "devnet-orama.network")
|
|
RQLiteDSN string // RQLite connection DSN (e.g., "http://localhost:10000")
|
|
OlricServers []string // Olric server addresses
|
|
NodePeerID string // Physical node's peer ID for home node management
|
|
DataDir string // Data directory for deployments, SQLite, etc.
|
|
}
|
|
|
|
// GatewayYAMLConfig represents the gateway YAML configuration structure
|
|
// This must match the yamlCfg struct in cmd/gateway/config.go exactly
|
|
// because the gateway uses strict YAML decoding that rejects unknown fields
|
|
type GatewayYAMLConfig struct {
|
|
ListenAddr string `yaml:"listen_addr"`
|
|
ClientNamespace string `yaml:"client_namespace"`
|
|
RQLiteDSN string `yaml:"rqlite_dsn"`
|
|
BootstrapPeers []string `yaml:"bootstrap_peers,omitempty"`
|
|
EnableHTTPS bool `yaml:"enable_https,omitempty"`
|
|
DomainName string `yaml:"domain_name,omitempty"`
|
|
TLSCacheDir string `yaml:"tls_cache_dir,omitempty"`
|
|
OlricServers []string `yaml:"olric_servers"`
|
|
OlricTimeout string `yaml:"olric_timeout,omitempty"`
|
|
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url,omitempty"`
|
|
IPFSAPIURL string `yaml:"ipfs_api_url,omitempty"`
|
|
IPFSTimeout string `yaml:"ipfs_timeout,omitempty"`
|
|
IPFSReplicationFactor int `yaml:"ipfs_replication_factor,omitempty"`
|
|
}
|
|
|
|
// NewInstanceSpawner creates a new Gateway instance spawner
|
|
func NewInstanceSpawner(baseDir string, logger *zap.Logger) *InstanceSpawner {
|
|
return &InstanceSpawner{
|
|
logger: logger.With(zap.String("component", "gateway-instance-spawner")),
|
|
baseDir: baseDir,
|
|
instances: make(map[string]*GatewayInstance),
|
|
}
|
|
}
|
|
|
|
// instanceKey generates a unique key for an instance based on namespace and node
|
|
func instanceKey(ns, nodeID string) string {
|
|
return fmt.Sprintf("%s:%s", ns, nodeID)
|
|
}
|
|
|
|
// SpawnInstance starts a new Gateway instance for a namespace on a specific node.
|
|
// Returns the instance info or an error if spawning fails.
|
|
func (is *InstanceSpawner) SpawnInstance(ctx context.Context, cfg InstanceConfig) (*GatewayInstance, error) {
|
|
key := instanceKey(cfg.Namespace, cfg.NodeID)
|
|
|
|
is.mu.Lock()
|
|
if existing, ok := is.instances[key]; ok {
|
|
is.mu.Unlock()
|
|
// Instance already exists, return it if running
|
|
if existing.Status == InstanceStatusRunning {
|
|
return existing, nil
|
|
}
|
|
// Otherwise, remove it and start fresh
|
|
is.mu.Lock()
|
|
delete(is.instances, key)
|
|
}
|
|
is.mu.Unlock()
|
|
|
|
// Create config and logs directories
|
|
configDir := filepath.Join(is.baseDir, cfg.Namespace, "configs")
|
|
logsDir := filepath.Join(is.baseDir, cfg.Namespace, "logs")
|
|
dataDir := filepath.Join(is.baseDir, cfg.Namespace, "data")
|
|
|
|
for _, dir := range []string{configDir, logsDir, dataDir} {
|
|
if err := os.MkdirAll(dir, 0755); err != nil {
|
|
return nil, &InstanceError{
|
|
Message: fmt.Sprintf("failed to create directory %s", dir),
|
|
Cause: err,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Generate config file
|
|
configPath := filepath.Join(configDir, fmt.Sprintf("gateway-%s.yaml", cfg.NodeID))
|
|
if err := is.generateConfig(configPath, cfg, dataDir); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
instance := &GatewayInstance{
|
|
Namespace: cfg.Namespace,
|
|
NodeID: cfg.NodeID,
|
|
HTTPPort: cfg.HTTPPort,
|
|
BaseDomain: cfg.BaseDomain,
|
|
RQLiteDSN: cfg.RQLiteDSN,
|
|
OlricServers: cfg.OlricServers,
|
|
ConfigPath: configPath,
|
|
Status: InstanceStatusStarting,
|
|
logger: is.logger.With(zap.String("namespace", cfg.Namespace), zap.String("node_id", cfg.NodeID)),
|
|
}
|
|
|
|
instance.logger.Info("Starting Gateway instance",
|
|
zap.Int("http_port", cfg.HTTPPort),
|
|
zap.String("rqlite_dsn", cfg.RQLiteDSN),
|
|
zap.Strings("olric_servers", cfg.OlricServers),
|
|
)
|
|
|
|
// Find the gateway binary - look in common locations
|
|
var gatewayBinary string
|
|
possiblePaths := []string{
|
|
"./bin/gateway", // Development build
|
|
"/usr/local/bin/orama-gateway", // System-wide install
|
|
"/opt/orama/bin/gateway", // Package install
|
|
}
|
|
|
|
for _, path := range possiblePaths {
|
|
if _, err := os.Stat(path); err == nil {
|
|
gatewayBinary = path
|
|
break
|
|
}
|
|
}
|
|
|
|
// Also check PATH
|
|
if gatewayBinary == "" {
|
|
if path, err := exec.LookPath("orama-gateway"); err == nil {
|
|
gatewayBinary = path
|
|
}
|
|
}
|
|
|
|
if gatewayBinary == "" {
|
|
return nil, &InstanceError{
|
|
Message: "gateway binary not found (checked ./bin/gateway, /usr/local/bin/orama-gateway, /opt/orama/bin/gateway, PATH)",
|
|
Cause: nil,
|
|
}
|
|
}
|
|
|
|
instance.logger.Info("Found gateway binary", zap.String("path", gatewayBinary))
|
|
|
|
// Create command
|
|
cmd := exec.CommandContext(ctx, gatewayBinary, "--config", configPath)
|
|
instance.cmd = cmd
|
|
|
|
// Setup logging
|
|
logPath := filepath.Join(logsDir, fmt.Sprintf("gateway-%s.log", cfg.NodeID))
|
|
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
|
if err != nil {
|
|
return nil, &InstanceError{
|
|
Message: "failed to open log file",
|
|
Cause: err,
|
|
}
|
|
}
|
|
|
|
cmd.Stdout = logFile
|
|
cmd.Stderr = logFile
|
|
|
|
// Start the process
|
|
if err := cmd.Start(); err != nil {
|
|
logFile.Close()
|
|
return nil, &InstanceError{
|
|
Message: "failed to start Gateway process",
|
|
Cause: err,
|
|
}
|
|
}
|
|
|
|
logFile.Close()
|
|
|
|
instance.PID = cmd.Process.Pid
|
|
instance.StartedAt = time.Now()
|
|
|
|
// Store instance
|
|
is.mu.Lock()
|
|
is.instances[key] = instance
|
|
is.mu.Unlock()
|
|
|
|
// Wait for instance to be ready
|
|
if err := is.waitForInstanceReady(ctx, instance); err != nil {
|
|
// Kill the process on failure
|
|
if cmd.Process != nil {
|
|
_ = cmd.Process.Kill()
|
|
}
|
|
is.mu.Lock()
|
|
delete(is.instances, key)
|
|
is.mu.Unlock()
|
|
return nil, &InstanceError{
|
|
Message: "Gateway instance did not become ready",
|
|
Cause: err,
|
|
}
|
|
}
|
|
|
|
instance.Status = InstanceStatusRunning
|
|
instance.LastHealthCheck = time.Now()
|
|
|
|
instance.logger.Info("Gateway instance started successfully",
|
|
zap.Int("pid", instance.PID),
|
|
)
|
|
|
|
// Start background process monitor
|
|
go is.monitorInstance(instance)
|
|
|
|
return instance, nil
|
|
}
|
|
|
|
// generateConfig generates the Gateway YAML configuration file
|
|
func (is *InstanceSpawner) generateConfig(configPath string, cfg InstanceConfig, dataDir string) error {
|
|
gatewayCfg := GatewayYAMLConfig{
|
|
ListenAddr: fmt.Sprintf(":%d", cfg.HTTPPort),
|
|
ClientNamespace: cfg.Namespace,
|
|
RQLiteDSN: cfg.RQLiteDSN,
|
|
OlricServers: cfg.OlricServers,
|
|
// Note: DomainName is used for HTTPS/TLS, not needed for namespace gateways in dev mode
|
|
DomainName: cfg.BaseDomain,
|
|
}
|
|
|
|
data, err := yaml.Marshal(gatewayCfg)
|
|
if err != nil {
|
|
return &InstanceError{
|
|
Message: "failed to marshal Gateway config",
|
|
Cause: err,
|
|
}
|
|
}
|
|
|
|
if err := os.WriteFile(configPath, data, 0644); err != nil {
|
|
return &InstanceError{
|
|
Message: "failed to write Gateway config",
|
|
Cause: err,
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// StopInstance stops a Gateway instance for a namespace on a specific node
|
|
func (is *InstanceSpawner) StopInstance(ctx context.Context, ns, nodeID string) error {
|
|
key := instanceKey(ns, nodeID)
|
|
|
|
is.mu.Lock()
|
|
instance, ok := is.instances[key]
|
|
if !ok {
|
|
is.mu.Unlock()
|
|
return nil // Already stopped
|
|
}
|
|
delete(is.instances, key)
|
|
is.mu.Unlock()
|
|
|
|
if instance.cmd != nil && instance.cmd.Process != nil {
|
|
instance.logger.Info("Stopping Gateway instance", zap.Int("pid", instance.PID))
|
|
|
|
// Send SIGTERM for graceful shutdown
|
|
if err := instance.cmd.Process.Signal(os.Interrupt); err != nil {
|
|
// If SIGTERM fails, kill it
|
|
_ = instance.cmd.Process.Kill()
|
|
}
|
|
|
|
// Wait for process to exit with timeout
|
|
done := make(chan error, 1)
|
|
go func() {
|
|
done <- instance.cmd.Wait()
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
instance.logger.Info("Gateway instance stopped gracefully")
|
|
case <-time.After(10 * time.Second):
|
|
instance.logger.Warn("Gateway instance did not stop gracefully, killing")
|
|
_ = instance.cmd.Process.Kill()
|
|
case <-ctx.Done():
|
|
_ = instance.cmd.Process.Kill()
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
|
|
instance.Status = InstanceStatusStopped
|
|
return nil
|
|
}
|
|
|
|
// StopAllInstances stops all Gateway instances for a namespace
|
|
func (is *InstanceSpawner) StopAllInstances(ctx context.Context, ns string) error {
|
|
is.mu.RLock()
|
|
var keys []string
|
|
for key, inst := range is.instances {
|
|
if inst.Namespace == ns {
|
|
keys = append(keys, key)
|
|
}
|
|
}
|
|
is.mu.RUnlock()
|
|
|
|
var lastErr error
|
|
for _, key := range keys {
|
|
parts := strings.SplitN(key, ":", 2)
|
|
if len(parts) == 2 {
|
|
if err := is.StopInstance(ctx, parts[0], parts[1]); err != nil {
|
|
lastErr = err
|
|
}
|
|
}
|
|
}
|
|
return lastErr
|
|
}
|
|
|
|
// GetInstance returns the instance for a namespace on a specific node
|
|
func (is *InstanceSpawner) GetInstance(ns, nodeID string) (*GatewayInstance, bool) {
|
|
is.mu.RLock()
|
|
defer is.mu.RUnlock()
|
|
|
|
instance, ok := is.instances[instanceKey(ns, nodeID)]
|
|
return instance, ok
|
|
}
|
|
|
|
// GetNamespaceInstances returns all instances for a namespace
|
|
func (is *InstanceSpawner) GetNamespaceInstances(ns string) []*GatewayInstance {
|
|
is.mu.RLock()
|
|
defer is.mu.RUnlock()
|
|
|
|
var instances []*GatewayInstance
|
|
for _, inst := range is.instances {
|
|
if inst.Namespace == ns {
|
|
instances = append(instances, inst)
|
|
}
|
|
}
|
|
return instances
|
|
}
|
|
|
|
// HealthCheck checks if an instance is healthy
|
|
func (is *InstanceSpawner) HealthCheck(ctx context.Context, ns, nodeID string) (bool, error) {
|
|
instance, ok := is.GetInstance(ns, nodeID)
|
|
if !ok {
|
|
return false, &InstanceError{Message: "instance not found"}
|
|
}
|
|
|
|
healthy, err := instance.IsHealthy(ctx)
|
|
if healthy {
|
|
is.mu.Lock()
|
|
instance.LastHealthCheck = time.Now()
|
|
is.mu.Unlock()
|
|
}
|
|
return healthy, err
|
|
}
|
|
|
|
// waitForInstanceReady waits for the Gateway instance to be ready
|
|
func (is *InstanceSpawner) waitForInstanceReady(ctx context.Context, instance *GatewayInstance) error {
|
|
client := tlsutil.NewHTTPClient(2 * time.Second)
|
|
|
|
// Gateway health check endpoint
|
|
url := fmt.Sprintf("http://localhost:%d/v1/health", instance.HTTPPort)
|
|
|
|
maxAttempts := 120 // 2 minutes
|
|
for i := 0; i < maxAttempts; i++ {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(1 * time.Second):
|
|
}
|
|
|
|
resp, err := client.Get(url)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
resp.Body.Close()
|
|
|
|
if resp.StatusCode == http.StatusOK {
|
|
instance.logger.Debug("Gateway instance ready",
|
|
zap.Int("attempts", i+1),
|
|
)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return fmt.Errorf("Gateway did not become ready within timeout")
|
|
}
|
|
|
|
// monitorInstance monitors an instance and updates its status
|
|
func (is *InstanceSpawner) monitorInstance(instance *GatewayInstance) {
|
|
ticker := time.NewTicker(30 * time.Second)
|
|
defer ticker.Stop()
|
|
|
|
for range ticker.C {
|
|
is.mu.RLock()
|
|
key := instanceKey(instance.Namespace, instance.NodeID)
|
|
_, exists := is.instances[key]
|
|
is.mu.RUnlock()
|
|
|
|
if !exists {
|
|
// Instance was removed
|
|
return
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
healthy, _ := instance.IsHealthy(ctx)
|
|
cancel()
|
|
|
|
is.mu.Lock()
|
|
if healthy {
|
|
instance.Status = InstanceStatusRunning
|
|
instance.LastHealthCheck = time.Now()
|
|
} else {
|
|
instance.Status = InstanceStatusFailed
|
|
instance.logger.Warn("Gateway instance health check failed")
|
|
}
|
|
is.mu.Unlock()
|
|
|
|
// Check if process is still running
|
|
if instance.cmd != nil && instance.cmd.ProcessState != nil && instance.cmd.ProcessState.Exited() {
|
|
is.mu.Lock()
|
|
instance.Status = InstanceStatusStopped
|
|
is.mu.Unlock()
|
|
instance.logger.Warn("Gateway instance process exited unexpectedly")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// IsHealthy checks if the Gateway instance is healthy
|
|
func (gi *GatewayInstance) IsHealthy(ctx context.Context) (bool, error) {
|
|
url := fmt.Sprintf("http://localhost:%d/v1/health", gi.HTTPPort)
|
|
client := tlsutil.NewHTTPClient(5 * time.Second)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
return resp.StatusCode == http.StatusOK, nil
|
|
}
|
|
|
|
// DSN returns the local connection address for this Gateway instance
|
|
func (gi *GatewayInstance) DSN() string {
|
|
return fmt.Sprintf("http://localhost:%d", gi.HTTPPort)
|
|
}
|
|
|
|
// ExternalURL returns the external URL for accessing this namespace's gateway
|
|
func (gi *GatewayInstance) ExternalURL() string {
|
|
return fmt.Sprintf("https://ns-%s.%s", gi.Namespace, gi.BaseDomain)
|
|
}
|