diff --git a/CHANGELOG.md b/CHANGELOG.md index e13dd74..0b57bb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,21 @@ The format is based on [Keep a Changelog][keepachangelog] and adheres to [Semant ### Deprecated ### Fixed +## [0.69.13] - 2025-11-14 + +### Added +\n +### Changed +- The Gateway service now waits for the Olric cache service to start before attempting initialization. +- Improved robustness of Olric cache client initialization with retry logic and exponential backoff. + +### Deprecated + +### Removed + +### Fixed +- Corrected the default path logic for 'gateway.yaml' to prioritize the production data directory while maintaining fallback to legacy paths. + ## [0.69.12] - 2025-11-14 ### Added diff --git a/Makefile b/Makefile index 5de5d6c..28d2509 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ test-e2e: .PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports install-hooks kill -VERSION := 0.69.12 +VERSION := 0.69.13 COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' diff --git a/pkg/config/paths.go b/pkg/config/paths.go index c55b41e..81c90f2 100644 --- a/pkg/config/paths.go +++ b/pkg/config/paths.go @@ -42,14 +42,15 @@ func DefaultPath(component string) (string, error) { return "", err } + var gatewayDefault string // For gateway.yaml, check data/ directory first (production location) if component == "gateway.yaml" { dataPath := filepath.Join(dir, "data", component) if _, err := os.Stat(dataPath); err == nil { return dataPath, nil } - // Return data path as default for gateway.yaml (even if it doesn't exist yet) - return dataPath, nil + // Remember the preferred default so we can still fall back to legacy paths + gatewayDefault = dataPath } // First check in ~/.debros/configs/ (production installer location) @@ -64,6 +65,12 @@ func DefaultPath(component string) (string, error) { return legacyPath, nil } + if gatewayDefault != "" { + // If we preferred the data path (gateway.yaml) but didn't find it anywhere else, + // return the data path so error messages point to the production location. + return gatewayDefault, nil + } + // Return configs path as default (even if it doesn't exist yet) // This allows the error message to show the expected production location return configsPath, nil diff --git a/pkg/environments/production/services.go b/pkg/environments/production/services.go index 1463040..c0839b5 100644 --- a/pkg/environments/production/services.go +++ b/pkg/environments/production/services.go @@ -233,11 +233,12 @@ WantedBy=multi-user.target // GenerateGatewayService generates the DeBros Gateway systemd unit func (ssg *SystemdServiceGenerator) GenerateGatewayService(nodeType string) string { nodeService := fmt.Sprintf("debros-node-%s.service", nodeType) + olricService := "debros-olric.service" logFile := filepath.Join(ssg.debrosDir, "logs", "gateway.log") return fmt.Sprintf(`[Unit] Description=DeBros Gateway -After=%s -Wants=%s +After=%s %s +Wants=%s %s [Service] Type=simple @@ -262,7 +263,7 @@ ReadWritePaths=%s [Install] WantedBy=multi-user.target -`, nodeService, nodeService, ssg.debrosHome, ssg.debrosHome, ssg.debrosHome, ssg.debrosDir, logFile, logFile, ssg.debrosDir) +`, nodeService, olricService, nodeService, olricService, ssg.debrosHome, ssg.debrosHome, ssg.debrosHome, ssg.debrosDir, logFile, logFile, ssg.debrosDir) } // SystemdController manages systemd service operations diff --git a/pkg/gateway/gateway.go b/pkg/gateway/gateway.go index afcda16..ea32dd4 100644 --- a/pkg/gateway/gateway.go +++ b/pkg/gateway/gateway.go @@ -5,6 +5,7 @@ import ( "crypto/rand" "crypto/rsa" "database/sql" + "fmt" "net" "os" "path/filepath" @@ -25,6 +26,12 @@ import ( _ "github.com/rqlite/gorqlite/stdlib" ) +const ( + olricInitMaxAttempts = 5 + olricInitInitialBackoff = 500 * time.Millisecond + olricInitMaxBackoff = 5 * time.Second +) + // Config holds configuration for the gateway server type Config struct { ListenAddr string @@ -182,7 +189,7 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) { Servers: olricServers, Timeout: cfg.OlricTimeout, } - olricClient, olricErr := olric.NewClient(olricCfg, logger.Logger) + olricClient, olricErr := initializeOlricClientWithRetry(olricCfg, logger) if olricErr != nil { logger.ComponentWarn(logging.ComponentGeneral, "failed to initialize Olric cache client; cache endpoints disabled", zap.Error(olricErr)) } else { @@ -330,6 +337,38 @@ func (g *Gateway) getLocalSubscribers(topic, namespace string) []*localSubscribe return nil } +func initializeOlricClientWithRetry(cfg olric.Config, logger *logging.ColoredLogger) (*olric.Client, error) { + backoff := olricInitInitialBackoff + + for attempt := 1; attempt <= olricInitMaxAttempts; attempt++ { + client, err := olric.NewClient(cfg, logger.Logger) + if err == nil { + if attempt > 1 { + logger.ComponentInfo(logging.ComponentGeneral, "Olric cache client initialized after retries", + zap.Int("attempts", attempt)) + } + return client, nil + } + + logger.ComponentWarn(logging.ComponentGeneral, "Olric cache client init attempt failed", + zap.Int("attempt", attempt), + zap.Duration("retry_in", backoff), + zap.Error(err)) + + if attempt == olricInitMaxAttempts { + return nil, fmt.Errorf("failed to initialize Olric cache client after %d attempts: %w", attempt, err) + } + + time.Sleep(backoff) + backoff *= 2 + if backoff > olricInitMaxBackoff { + backoff = olricInitMaxBackoff + } + } + + return nil, fmt.Errorf("failed to initialize Olric cache client") +} + // discoverOlricServers discovers Olric server addresses from LibP2P peers // Returns a list of IP:port addresses where Olric servers are expected to run (port 3320) func discoverOlricServers(networkClient client.NetworkClient, logger *zap.Logger) []string {