diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..aa766c7 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,85 @@ +name: CI + +on: + push: + branches: + - main + - nightly + pull_request: + branches: + - main + - nightly + +permissions: + contents: read + +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: true + +jobs: + go-test: + name: Go tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.24" + cache-dependency-path: core/go.sum + + - name: Vet + working-directory: core + run: go vet ./... + + - name: Test + working-directory: core + run: go test -race -timeout 5m ./... + + sdk-build: + name: SDK typecheck, build, unit tests + runs-on: ubuntu-latest + defaults: + run: + working-directory: sdk + steps: + - uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Typecheck + run: pnpm typecheck + + - name: Build + run: pnpm build + + - name: Unit tests + run: pnpm vitest run tests/unit + + version-sanity: + name: Verify VERSION ↔ sdk/package.json sync + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Compare versions + run: | + ROOT=$(tr -d '[:space:]' < VERSION) + SDK=$(node -p "require('./sdk/package.json').version") + if [ "$ROOT" != "$SDK" ]; then + echo "::warning::/VERSION ($ROOT) and sdk/package.json ($SDK) differ. Run 'make -C core bump VER=$ROOT' to sync." + else + echo "Versions in sync: $ROOT" + fi diff --git a/.github/workflows/publish-sdk.yml b/.github/workflows/publish-sdk.yml index 01b03f9..1c52d58 100644 --- a/.github/workflows/publish-sdk.yml +++ b/.github/workflows/publish-sdk.yml @@ -1,6 +1,8 @@ name: Publish SDK to npm on: + release: + types: [published] workflow_dispatch: inputs: version: @@ -26,6 +28,20 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Verify VERSION file matches release tag + if: github.event_name == 'release' + working-directory: . + run: | + TAG="${{ github.event.release.tag_name }}" + EXPECTED="${TAG#v}" + EXPECTED="${EXPECTED%-nightly}" + ACTUAL=$(tr -d '[:space:]' < VERSION) + if [ "$EXPECTED" != "$ACTUAL" ]; then + echo "::error::Tag $TAG implies version '$EXPECTED' but /VERSION says '$ACTUAL'." + echo "::error::Run 'make -C core bump VER=$EXPECTED' and commit before tagging." + exit 1 + fi + - name: Set up Node.js uses: actions/setup-node@v4 with: @@ -41,8 +57,14 @@ jobs: run: pnpm install --frozen-lockfile - name: Bump version - if: inputs.version != '' - run: npm version ${{ inputs.version }} --no-git-tag-version + run: | + if [ "${{ github.event_name }}" = "release" ]; then + VERSION="${{ github.event.release.tag_name }}" + VERSION="${VERSION#v}" + npm version "$VERSION" --no-git-tag-version + elif [ -n "${{ inputs.version }}" ]; then + npm version ${{ inputs.version }} --no-git-tag-version + fi - name: Typecheck run: pnpm typecheck @@ -60,18 +82,23 @@ jobs: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Publish - if: inputs.dry-run == false - run: npm publish --access public + if: github.event_name == 'release' || inputs.dry-run != true + run: | + if [[ "${{ github.event.release.target_commitish }}" != "main" && "${{ github.event_name }}" == "release" ]]; then + npm publish --access public --tag nightly + else + npm publish --access public + fi env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Get published version - if: inputs.dry-run == false + if: github.event_name == 'release' || inputs.dry-run != true id: version run: echo "version=$(node -p "require('./package.json').version")" >> $GITHUB_OUTPUT - name: Create git tag - if: inputs.dry-run == false + if: github.event_name != 'release' && inputs.dry-run != true working-directory: . run: | git config user.name "github-actions[bot]" diff --git a/.github/workflows/release-apt.yml b/.github/workflows/release-apt.yml index 9092e3f..3d92970 100644 --- a/.github/workflows/release-apt.yml +++ b/.github/workflows/release-apt.yml @@ -25,6 +25,19 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Verify VERSION file matches release tag + if: github.event_name == 'release' + run: | + TAG="${{ github.event.release.tag_name }}" + EXPECTED="${TAG#v}" + EXPECTED="${EXPECTED%-nightly}" + ACTUAL=$(tr -d '[:space:]' < VERSION) + if [ "$EXPECTED" != "$ACTUAL" ]; then + echo "::error::Tag $TAG implies version '$EXPECTED' but /VERSION says '$ACTUAL'." + echo "::error::Run 'make -C core bump VER=$EXPECTED' and commit before tagging." + exit 1 + fi + - name: Set up Go uses: actions/setup-go@v5 with: @@ -58,8 +71,8 @@ jobs: LDFLAGS="-X 'main.version=$VERSION' -X 'main.commit=$COMMIT' -X 'main.date=$DATE'" mkdir -p build/usr/local/bin - go build -ldflags "$LDFLAGS" -o build/usr/local/bin/orama cmd/cli/main.go - go build -ldflags "$LDFLAGS" -o build/usr/local/bin/orama-node cmd/node/main.go + go build -ldflags "$LDFLAGS" -o build/usr/local/bin/orama ./cmd/cli + go build -ldflags "$LDFLAGS" -o build/usr/local/bin/orama-node ./cmd/node # Build the entire gateway package so helper files (e.g., config parsing) are included go build -ldflags "$LDFLAGS" -o build/usr/local/bin/orama-gateway ./cmd/gateway @@ -111,7 +124,6 @@ jobs: PKG_NAME="orama_${VERSION}_${ARCH}" dpkg-deb --build ${PKG_NAME} - mv ${PKG_NAME}.deb orama_${VERSION}_${ARCH}.deb - name: Upload artifact uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index f949e51..2ad7fd1 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -13,29 +13,42 @@ permissions: jobs: build-release: runs-on: ubuntu-latest - + steps: - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 # Need full history for changelog - + + - name: Verify VERSION file matches release tag + run: | + TAG="${GITHUB_REF_NAME}" + EXPECTED="${TAG#v}" + EXPECTED="${EXPECTED%-nightly}" + ACTUAL=$(tr -d '[:space:]' < VERSION) + if [ "$EXPECTED" != "$ACTUAL" ]; then + echo "::error::Tag $TAG implies version '$EXPECTED' but /VERSION says '$ACTUAL'." + echo "::error::Run 'make -C core bump VER=$EXPECTED' and commit before tagging." + exit 1 + fi + echo "VERSION file matches tag: $ACTUAL" + - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: '1.24' cache-dependency-path: core/go.sum - + - name: Run GoReleaser - uses: goreleaser/goreleaser-action@v5 + uses: goreleaser/goreleaser-action@v6 with: distribution: goreleaser - version: latest + version: '~> v2' args: release --clean env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} HOMEBREW_TAP_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }} - + - name: Upload artifacts uses: actions/upload-artifact@v4 with: diff --git a/.gitignore b/.gitignore index 207bf3a..1fd070d 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ Thumbs.db # === Core (Go) === core/phantom-auth/ +bin/ core/bin/ core/bin-linux/ core/dist/ @@ -65,6 +66,7 @@ go.work *.db # === Website === +website/remote.conf website/node_modules/ website/dist/ website/invest-api/invest-api @@ -88,3 +90,6 @@ os/output/ .dev/ .local/ local/ + +# Implementation plans (not committed) +core/plans/ diff --git a/.goreleaser.yaml b/.goreleaser.yaml index dfffe94..c4cd3fa 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -1,7 +1,8 @@ -# GoReleaser Configuration for DeBros Network -# Builds and releases orama (CLI) and orama-node binaries -# Publishes to: GitHub Releases, Homebrew, and apt (.deb packages) +# GoReleaser v2 Configuration for DeBros Network +# Builds and releases orama (CLI) and orama-node binaries. +# Publishes to: GitHub Releases, Homebrew (stable only), and apt (.deb packages). +version: 2 project_name: orama-network env: @@ -9,8 +10,7 @@ env: before: hooks: - - cmd: go mod tidy - dir: core + - go -C core mod tidy builds: # orama CLI binary @@ -51,9 +51,9 @@ builds: archives: # Tar.gz archives for orama CLI - id: orama-archives - builds: + ids: - orama - format: tar.gz + formats: [tar.gz] name_template: "orama_{{ .Version }}_{{ .Os }}_{{ .Arch }}" files: - README.md @@ -61,9 +61,9 @@ archives: # Tar.gz archives for orama-node - id: orama-node-archives - builds: + ids: - orama-node - format: tar.gz + formats: [tar.gz] name_template: "orama-node_{{ .Version }}_{{ .Os }}_{{ .Arch }}" files: - README.md @@ -74,10 +74,10 @@ nfpms: # orama CLI .deb package - id: orama-deb package_name: orama - builds: + ids: - orama vendor: DeBros - homepage: https://github.com/DeBrosOfficial/network + homepage: https://github.com/DeBrosDAO/orama maintainer: DeBros description: CLI tool for the Orama decentralized network license: MIT @@ -87,7 +87,7 @@ nfpms: section: utils priority: optional contents: - - src: ./core/README.md + - src: ./README.md dst: /usr/share/doc/orama/README.md deb: lintian_overrides: @@ -96,10 +96,10 @@ nfpms: # orama-node .deb package - id: orama-node-deb package_name: orama-node - builds: + ids: - orama-node vendor: DeBros - homepage: https://github.com/DeBrosOfficial/network + homepage: https://github.com/DeBrosDAO/orama maintainer: DeBros description: Node daemon for the Orama decentralized network license: MIT @@ -109,25 +109,28 @@ nfpms: section: net priority: optional contents: - - src: ./core/README.md + - src: ./README.md dst: /usr/share/doc/orama-node/README.md deb: lintian_overrides: - statically-linked-binary -# Homebrew tap for macOS (orama CLI only) +# Homebrew tap for macOS (orama CLI only). +# Stable releases only — prereleases (nightly) are skipped so we don't +# pollute the tap or fight a 401 on a missing HOMEBREW_TAP_TOKEN. brews: - name: orama ids: - orama-archives repository: - owner: DeBrosOfficial + owner: DeBrosDAO name: homebrew-tap token: "{{ .Env.HOMEBREW_TAP_TOKEN }}" - folder: Formula - homepage: https://github.com/DeBrosOfficial/network + directory: Formula + homepage: https://github.com/DeBrosDAO/orama description: CLI tool for the Orama decentralized network license: MIT + skip_upload: '{{ if .Prerelease }}true{{ else }}false{{ end }}' install: | bin.install "orama" test: | @@ -138,7 +141,7 @@ checksum: algorithm: sha256 snapshot: - name_template: "{{ incpatch .Version }}-next" + version_template: "{{ incpatch .Version }}-next" changelog: sort: asc @@ -154,8 +157,8 @@ changelog: release: github: - owner: DeBrosOfficial - name: network + owner: DeBrosDAO + name: orama draft: false prerelease: auto name_template: "Release {{.Version}}" diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..b1eed08 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.122.9 diff --git a/core/Makefile b/core/Makefile index da8ab1a..75d9d38 100644 --- a/core/Makefile +++ b/core/Makefile @@ -61,9 +61,11 @@ test-e2e-quick: # Network - Distributed P2P Database System # Makefile for development and build tasks -.PHONY: build clean test deps tidy fmt vet lint install-hooks push-devnet push-testnet rollout-devnet rollout-testnet release +.PHONY: build clean test deps tidy fmt vet lint install-hooks push-devnet push-testnet rollout-devnet rollout-testnet release bump -VERSION := 0.120.0 +# Single source of truth — repo-root VERSION file. Update with `make bump VER=X.Y.Z` +# or by editing /VERSION directly. Release workflows verify this matches the tag. +VERSION := $(shell cat ../VERSION 2>/dev/null | tr -d '[:space:]' || echo unknown) COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' @@ -80,6 +82,7 @@ build: deps go build -ldflags "$(LDFLAGS) -X 'github.com/DeBrosOfficial/network/pkg/gateway.BuildVersion=$(VERSION)' -X 'github.com/DeBrosOfficial/network/pkg/gateway.BuildCommit=$(COMMIT)' -X 'github.com/DeBrosOfficial/network/pkg/gateway.BuildTime=$(DATE)'" -o bin/gateway ./cmd/gateway go build -ldflags "$(LDFLAGS)" -o bin/sfu ./cmd/sfu go build -ldflags "$(LDFLAGS)" -o bin/turn ./cmd/turn + go build -ldflags "$(LDFLAGS)" -o bin/orama-sni-router ./cmd/sni-router @echo "Build complete! Run ./bin/orama version" # Cross-compile CLI for Linux (only binary needed locally; VPS builds everything else from source) @@ -129,6 +132,17 @@ rollout-devnet: rollout-testnet: ./bin/orama node rollout --env testnet --yes +# Bump the repo-root VERSION file and sync sdk/package.json. +# Usage: make bump VER=0.122.9 +bump: + @if [ -z "$(VER)" ]; then \ + echo "Usage: make bump VER=X.Y.Z"; exit 1; \ + fi + @echo "$(VER)" > ../VERSION + @cd ../sdk && npm version $(VER) --no-git-tag-version > /dev/null + @echo "Bumped VERSION and sdk/package.json to $(VER)" + @echo "Next: git add ../VERSION ../sdk/package.json && git commit -m 'release: $(VER)'" + # Interactive release workflow (tag + push) release: @bash scripts/release.sh diff --git a/core/cli b/core/cli new file mode 100755 index 0000000..0a39f53 Binary files /dev/null and b/core/cli differ diff --git a/core/cmd/cli/root.go b/core/cmd/cli/root.go index 0f27fdd..8401311 100644 --- a/core/cmd/cli/root.go +++ b/core/cmd/cli/root.go @@ -18,7 +18,12 @@ import ( "github.com/DeBrosOfficial/network/pkg/cli/cmd/monitorcmd" "github.com/DeBrosOfficial/network/pkg/cli/cmd/namespacecmd" "github.com/DeBrosOfficial/network/pkg/cli/cmd/node" + "github.com/DeBrosOfficial/network/pkg/cli/cmd/nodescmd" + "github.com/DeBrosOfficial/network/pkg/cli/cmd/pushcmd" + "github.com/DeBrosOfficial/network/pkg/cli/cmd/rolloutcmd" "github.com/DeBrosOfficial/network/pkg/cli/cmd/sandboxcmd" + "github.com/DeBrosOfficial/network/pkg/cli/cmd/sshcmd" + "github.com/DeBrosOfficial/network/pkg/cli/cmd/statuscmd" ) // version metadata populated via -ldflags at build time @@ -91,6 +96,13 @@ and interacting with the Orama distributed network.`, // Sandbox command (ephemeral Hetzner Cloud clusters) rootCmd.AddCommand(sandboxcmd.Cmd) + // Unified node management commands + rootCmd.AddCommand(nodescmd.Cmd) + rootCmd.AddCommand(pushcmd.Cmd) + rootCmd.AddCommand(rolloutcmd.Cmd) + rootCmd.AddCommand(statuscmd.Cmd) + rootCmd.AddCommand(sshcmd.Cmd) + return rootCmd } diff --git a/core/cmd/gateway/config.go b/core/cmd/gateway/config.go index e263d1d..e97f27f 100644 --- a/core/cmd/gateway/config.go +++ b/core/cmd/gateway/config.go @@ -77,21 +77,26 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config { } type yamlCfg struct { - ListenAddr string `yaml:"listen_addr"` - ClientNamespace string `yaml:"client_namespace"` - RQLiteDSN string `yaml:"rqlite_dsn"` - GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"` - Peers []string `yaml:"bootstrap_peers"` - EnableHTTPS bool `yaml:"enable_https"` - DomainName string `yaml:"domain_name"` - TLSCacheDir string `yaml:"tls_cache_dir"` - OlricServers []string `yaml:"olric_servers"` - OlricTimeout string `yaml:"olric_timeout"` - IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"` - IPFSAPIURL string `yaml:"ipfs_api_url"` - IPFSTimeout string `yaml:"ipfs_timeout"` - IPFSReplicationFactor int `yaml:"ipfs_replication_factor"` + ListenAddr string `yaml:"listen_addr"` + ClientNamespace string `yaml:"client_namespace"` + RQLiteDSN string `yaml:"rqlite_dsn"` + GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"` + Peers []string `yaml:"bootstrap_peers"` + EnableHTTPS bool `yaml:"enable_https"` + DomainName string `yaml:"domain_name"` + TLSCacheDir string `yaml:"tls_cache_dir"` + OlricServers []string `yaml:"olric_servers"` + OlricTimeout string `yaml:"olric_timeout"` + IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"` + IPFSAPIURL string `yaml:"ipfs_api_url"` + IPFSTimeout string `yaml:"ipfs_timeout"` + IPFSReplicationFactor int `yaml:"ipfs_replication_factor"` WebRTC yamlWebRTCCfg `yaml:"webrtc"` + // ClusterSecretPath: see GatewayYAMLConfig docstring. Optional; + // when set, the standalone gateway reads the file at this path + // and populates cfg.ClusterSecret so JWT signing keys can be + // derived deterministically (bug #215 fix). + ClusterSecretPath string `yaml:"cluster_secret_path"` } data, err := os.ReadFile(configPath) @@ -200,6 +205,30 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config { cfg.IPFSReplicationFactor = y.IPFSReplicationFactor } + // Cluster secret — bug #215 fix. The host-managed gateway in + // pkg/node/gateway.go reads this from a known on-disk path; the + // standalone binary (used by namespace gateways via systemd) needs the + // same access so it can derive the cluster-wide Ed25519 JWT signing + // key. Without this, namespace gateways had per-node random keys and + // JWTs minted on one node were unverifiable on another, leaving + // `caller_jwt_subject` empty in serverless host functions. + if path := strings.TrimSpace(y.ClusterSecretPath); path != "" { + secretBytes, err := os.ReadFile(path) + if err != nil { + logger.ComponentError(logging.ComponentGeneral, + "cluster_secret_path is set but the file is unreadable; "+ + "JWTs will use a per-node random signing key and will not "+ + "verify cross-node — bug #215 will reproduce", + zap.String("path", path), + zap.Error(err)) + } else { + cfg.ClusterSecret = strings.TrimSpace(string(secretBytes)) + logger.ComponentInfo(logging.ComponentGeneral, + "Loaded cluster secret for cluster-wide JWT signing key derivation", + zap.String("path", path)) + } + } + // WebRTC configuration cfg.WebRTCEnabled = y.WebRTC.Enabled if y.WebRTC.SFUPort > 0 { diff --git a/core/cmd/sni-router/main.go b/core/cmd/sni-router/main.go new file mode 100644 index 0000000..cc727df --- /dev/null +++ b/core/cmd/sni-router/main.go @@ -0,0 +1,242 @@ +// Command sni-router is a TLS-level Server Name Indication router. +// +// It listens on a public TCP port (typically :443), peeks at the TLS +// ClientHello SNI on each connection, and forwards the raw stream to +// a configured backend. It does NOT terminate TLS — encrypted bytes +// pass through verbatim. This lets one port serve multiple TLS-speaking +// backends (HTTPS for the gateway, TURN-over-TLS for stealth WebRTC). +// +// See pkg/sniproxy for the underlying library. +// +// Configuration: YAML file at --config (defaults to ~/.orama/sni-router.yaml). +// +// Example sni-router.yaml: +// +// listen: ":443" +// client_hello_timeout: 5s +// backend_dial_timeout: 5s +// max_concurrent_conns: 10000 +// fallback: +// name: caddy +// addr: "127.0.0.1:8443" +// routes: +// - match: "cdn.example.com" +// backend: +// name: turn-tls +// addr: "127.0.0.1:5349" +// - match: "turn.example.com" +// backend: +// name: turn-tls +// addr: "127.0.0.1:5349" +// - match: "*.ns-myapp.example.com" +// backend: +// name: gateway +// addr: "127.0.0.1:8443" +package main + +import ( + "flag" + "fmt" + "net" + "os" + "os/signal" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/DeBrosOfficial/network/pkg/config" + "github.com/DeBrosOfficial/network/pkg/logging" + "github.com/DeBrosOfficial/network/pkg/sniproxy" + "go.uber.org/zap" +) + +var ( + version = "dev" + commit = "unknown" +) + +// yamlBackend mirrors sniproxy.Backend for YAML decoding. +type yamlBackend struct { + Name string `yaml:"name"` + Network string `yaml:"network"` + Addr string `yaml:"addr"` +} + +// yamlRoute mirrors sniproxy.Route for YAML decoding. +type yamlRoute struct { + Match string `yaml:"match"` + Backend yamlBackend `yaml:"backend"` +} + +// yamlConfig is the on-disk configuration shape. +type yamlConfig struct { + Listen string `yaml:"listen"` + ClientHelloTimeout time.Duration `yaml:"client_hello_timeout"` + BackendDialTimeout time.Duration `yaml:"backend_dial_timeout"` + MaxConcurrentConns int `yaml:"max_concurrent_conns"` + Fallback yamlBackend `yaml:"fallback"` + Routes []yamlRoute `yaml:"routes"` +} + +func main() { + logger, err := logging.NewColoredLogger(logging.ComponentSNI, true) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to init logger: %v\n", err) + os.Exit(1) + } + + logger.ComponentInfo(logging.ComponentSNI, "Starting SNI router", + zap.String("version", version), + zap.String("commit", commit)) + + cfg := parseConfig(logger) + + router := sniproxy.NewRouter(toBackend(cfg.Fallback)) + router.Replace(toRoutes(cfg.Routes), toBackend(cfg.Fallback)) + + srv := sniproxy.NewServer(router, sniproxy.Config{ + ClientHelloTimeout: cfg.ClientHelloTimeout, + BackendDialTimeout: cfg.BackendDialTimeout, + MaxConcurrentConns: cfg.MaxConcurrentConns, + }, logger.Logger) + + ln, err := net.Listen("tcp", cfg.Listen) + if err != nil { + logger.ComponentError(logging.ComponentSNI, "Failed to listen", + zap.String("addr", cfg.Listen), zap.Error(err)) + os.Exit(1) + } + + logger.ComponentInfo(logging.ComponentSNI, "SNI router listening", + zap.String("addr", cfg.Listen), + zap.Int("routes", len(cfg.Routes)), + zap.String("fallback", cfg.Fallback.Addr), + ) + + // Run Serve in a goroutine so the main goroutine can wait on signals. + serveErrCh := make(chan error, 1) + go func() { + serveErrCh <- srv.Serve(ln) + }() + + // Wait for termination signal or unrecoverable Serve error. + quit := make(chan os.Signal, 1) + signal.Notify(quit, os.Interrupt, syscall.SIGTERM) + + select { + case sig := <-quit: + logger.ComponentInfo(logging.ComponentSNI, "Shutdown signal received", + zap.String("signal", sig.String())) + case err := <-serveErrCh: + logger.ComponentError(logging.ComponentSNI, "Serve returned", + zap.Error(err)) + } + + // Stop accepting new connections, then drain in-flight ones. + _ = ln.Close() + srv.Close() + + logger.ComponentInfo(logging.ComponentSNI, "SNI router shutdown complete") +} + +func parseConfig(logger *logging.ColoredLogger) yamlConfig { + configFlag := flag.String("config", "", "Config file path (absolute or filename in ~/.orama)") + flag.Parse() + + var configPath string + var err error + if *configFlag != "" { + if filepath.IsAbs(*configFlag) { + configPath = *configFlag + } else { + configPath, err = config.DefaultPath(*configFlag) + if err != nil { + logger.ComponentError(logging.ComponentSNI, "Failed to determine config path", + zap.Error(err)) + os.Exit(1) + } + } + } else { + configPath, err = config.DefaultPath("sni-router.yaml") + if err != nil { + logger.ComponentError(logging.ComponentSNI, "Failed to determine config path", + zap.Error(err)) + os.Exit(1) + } + } + + data, err := os.ReadFile(configPath) + if err != nil { + logger.ComponentError(logging.ComponentSNI, "Config file not found", + zap.String("path", configPath), zap.Error(err)) + fmt.Fprintf(os.Stderr, "\nConfig file not found at %s\n", configPath) + os.Exit(1) + } + + var y yamlConfig + if err := config.DecodeStrict(strings.NewReader(string(data)), &y); err != nil { + logger.ComponentError(logging.ComponentSNI, "Failed to parse SNI router config", + zap.Error(err)) + fmt.Fprintf(os.Stderr, "Configuration parse error: %v\n", err) + os.Exit(1) + } + + if errs := validateConfig(&y); len(errs) > 0 { + fmt.Fprintf(os.Stderr, "\nSNI router configuration errors (%d):\n", len(errs)) + for _, e := range errs { + fmt.Fprintf(os.Stderr, " - %s\n", e) + } + fmt.Fprintf(os.Stderr, "\nPlease fix the configuration and try again.\n") + os.Exit(1) + } + + logger.ComponentInfo(logging.ComponentSNI, "Loaded SNI router configuration", + zap.String("path", configPath), + ) + + return y +} + +// validateConfig returns a non-empty slice of human-readable errors on misconfig. +func validateConfig(y *yamlConfig) []string { + var errs []string + if y.Listen == "" { + errs = append(errs, "listen: required (e.g. \":443\")") + } + if y.Fallback.Addr == "" { + errs = append(errs, "fallback.addr: required (where to send unmatched SNIs, typically Caddy)") + } + for i, r := range y.Routes { + if r.Match == "" { + errs = append(errs, fmt.Sprintf("routes[%d].match: required", i)) + } + if r.Backend.Addr == "" { + errs = append(errs, fmt.Sprintf("routes[%d].backend.addr: required", i)) + } + } + return errs +} + +func toBackend(b yamlBackend) sniproxy.Backend { + network := b.Network + if network == "" { + network = "tcp" + } + return sniproxy.Backend{ + Name: b.Name, + Network: network, + Addr: b.Addr, + } +} + +func toRoutes(in []yamlRoute) []sniproxy.Route { + out := make([]sniproxy.Route, len(in)) + for i, r := range in { + out[i] = sniproxy.Route{ + Match: r.Match, + Backend: toBackend(r.Backend), + } + } + return out +} diff --git a/core/docs/DEV_DEPLOY.md b/core/docs/DEV_DEPLOY.md index 09bbbdc..e30c243 100644 --- a/core/docs/DEV_DEPLOY.md +++ b/core/docs/DEV_DEPLOY.md @@ -94,6 +94,46 @@ orama monitor report --env testnet - **DON'T** clear RQLite data directories unless doing a full cluster rebuild - **DON'T** use `systemctl stop orama-node` on multiple nodes simultaneously +#### Schema-Migration Ordering Invariant + +The gateway binary embeds a set of SQL migrations. The highest-numbered migration is the schema version that binary REQUIRES — **the gateway will refuse to start if its required schema isn't applied** (the schema-version contract added after the 2026-05-06 incident). + +This means rolling upgrades have ONE invariant you must respect: + +> The new gateway binary's required migrations must be applied to RQLite **before or as part of** starting the new binary on a node. + +There are two acceptable patterns: + +**Pattern A — let the gateway apply migrations on startup (default).** +The gateway calls `ApplyEmbeddedMigrations` during `NewDependencies` and asserts the schema is at the required version before serving traffic. If the apply succeeds, you're done. If a transient error blocks the apply, gateway startup aborts with a clear `schema mismatch: binary requires version N, database has M` error. + +This is the default for both the genesis startup flow and rolling upgrades. No operator action required when it works. + +**Pattern B — pre-apply migrations explicitly via the CLI.** +On any node: +```bash +sudo orama node schema status # show binary required vs applied +sudo orama node schema apply --yes # apply pending migrations +``` +Then start the new gateway. Useful when you want explicit control during a high-risk upgrade or when the auto-apply path is failing for reasons you want to debug separately. + +#### Verifying schema state remotely + +Tenants can self-check schema drift without SSH access via: +``` +GET /v1/schema-status +``` +Returns `{ok, required_version, applied_version, in_sync, pending: [...]}`. The same data is available via `orama node schema status` for operators with shell access. + +#### Build-time guard (CI) + +`go test ./migrations/` runs a roundtrip test that opens an in-memory SQLite, applies every embedded migration, and exercises representative SQL operations from the platform's Go code. If a Go handler is added that references a column no migration creates, the test fails — drift is caught at PR review time, not at production deploy. + +When adding a new platform table or column: +1. Write the migration in `core/migrations/NNN_description.sql` +2. Update the relevant Go code that reads/writes the new column +3. Add an exemplar to `migrations/roundtrip_test.go` mirroring the new SQL — this enforces the contract permanently + #### Recovery from Cluster Split If nodes get stuck in "Candidate" state or show "leader not found" errors: @@ -449,6 +489,60 @@ sudo cp caddy-root-ca.crt /usr/local/share/ca-certificates/caddy-root-ca.crt sudo update-ca-certificates ``` +## Push notifications + +Push provider configuration is **tenant-self-service** as of bug #220 +follow-up. Tenants set their own ntfy / Expo credentials via authenticated +HTTP — operators no longer need to edit YAML and restart for every namespace +that wants push. + +### Tenant flow (no operator involvement) + +```bash +# Set per-namespace config +curl -X PUT https://ns-anchat-test.orama-devnet.network/v1/push/config \ + -H 'Authorization: Bearer ' \ + -H 'Content-Type: application/json' \ + -d '{"ntfy_base_url": "https://ntfy.sh"}' + +# Read current config (secrets redacted to booleans) +curl https://ns-anchat-test.orama-devnet.network/v1/push/config \ + -H 'Authorization: Bearer ' + +# Clear (push reverts to gateway YAML defaults, or 503 if no defaults) +curl -X DELETE https://ns-anchat-test.orama-devnet.network/v1/push/config \ + -H 'Authorization: Bearer ' +``` + +Per-namespace config takes effect on the NEXT push send (the cached +dispatcher is invalidated on PUT/DELETE). No restart needed. + +### Operator flow (cluster-wide defaults — optional) + +Operators can still seed defaults in the gateway YAML. Per-namespace config +OVERRIDES the defaults; namespaces with no row inherit them. + +```yaml +# Cluster-wide push defaults (optional; tenants override per-namespace) +push: + ntfy_base_url: "https://ntfy.sh" # default for namespaces with no override + expo_access_token: "..." # default Expo token +``` + +### Encryption + +Sensitive credentials (`ntfy_auth_token`, `expo_access_token`) are +AES-256-GCM-encrypted at rest in the `namespace_push_config` table using +a key derived from the cluster secret. The GET endpoint returns boolean +`has_X` flags only — credentials are NEVER echoed back over HTTP. + +### Disabling push entirely + +If `cluster_secret` isn't configured on the gateway, the push subsystem +is disabled and `/v1/push/*` returns 503. To enable: set the cluster secret +and restart. (This is the only operator-side restart still required, and +it's a one-time action at gateway provisioning.) + ## Project Structure See [ARCHITECTURE.md](ARCHITECTURE.md) for the full architecture overview. diff --git a/core/docs/SERVERLESS.md b/core/docs/SERVERLESS.md index 6f27104..195ba8d 100644 --- a/core/docs/SERVERLESS.md +++ b/core/docs/SERVERLESS.md @@ -42,6 +42,10 @@ name: my-function # Required. Letters, digits, hyphens, underscores. public: false # Allow unauthenticated invocation (default: false) memory: 64 # Memory limit in MB (1-256, default: 64) timeout: 30 # Execution timeout in seconds (1-300, default: 30) + # Bump to 60-300 for batch DB ops, schema migrations, + # or anything that does many sequential host calls. + # Functions that exceed timeout return the canonical + # TIMEOUT envelope: {ok:false, error:{code:"TIMEOUT",...}}. retry: count: 0 # Retry attempts on failure (default: 0) delay: 5 # Seconds between retries (default: 5) @@ -99,15 +103,31 @@ tinygo build -o function.wasm -target wasi function.go ## Host Functions API -Host functions let your WASM code interact with Orama services. They are imported from the `"env"` or `"host"` module (both work) and use a pointer/length ABI for string parameters. +Host functions let your WASM code interact with Orama services. They use a pointer/length ABI for string parameters and are registered at runtime under three module-name aliases — all three resolve to the SAME function table: -All host functions are registered at runtime by the engine. They are available to every function without additional configuration. +| Module name | Status | Use | +|---|---|---| +| `env` | **canonical** | Recommended for new code. Matches the WASI / TinyGo convention used by every example in this doc and the `sdk/fn` package. | +| `host` | alias (kept) | Long-standing alternative; supported indefinitely. | +| `orama` | alias (kept) | Brand-name alias; supported indefinitely so existing code that intuited this name keeps working. | + +A function may import any host call from any of the three names interchangeably: + +```go +//go:wasmimport env db_query // canonical (preferred) +//go:wasmimport host db_query // identical +//go:wasmimport orama db_query // identical +``` + +If you see the runtime error `failed to instantiate module: module[X] not instantiated`, your function imported from a name other than the three above — fix the directive. Most functions written using the [`sdk/fn`](../sdk/fn) package don't need any `//go:wasmimport` directives at all (the SDK uses stdin/stdout for I/O). ### Context | Function | Description | |----------|-------------| -| `get_caller_wallet()` → string | Wallet address of the caller (from JWT) | +| `get_caller_wallet()` → string | Resolved caller wallet (JWT subject if Bearer auth, else namespace pseudo-id when API-key auth). | +| `get_caller_jwt_subject()` → string | JWT `sub` claim explicitly. Empty when the request was not JWT-authenticated. Use this when binding on the JWT-signed identity matters (e.g. signup flows verifying the caller signed for the wallet they're registering). | +| `get_caller_claim(name)` → string | Custom JWT claim by name (tier, subscription, etc.). Empty if missing or non-JWT request. | | `get_request_id()` → string | Unique invocation ID | | `get_env(key)` → string | Environment variable from function.yaml | | `get_secret(name)` → string | Decrypted secret value (see [Managing Secrets](#managing-secrets)) | @@ -116,15 +136,36 @@ All host functions are registered at runtime by the engine. They are available t | Function | Description | |----------|-------------| -| `db_query(sql, argsJSON)` → JSON | Execute SELECT query. Args as JSON array. Returns JSON array of row objects. | -| `db_execute(sql, argsJSON)` → int | Execute INSERT/UPDATE/DELETE. Returns affected row count. | +| `db_query_v2(sql, argsJSON)` → JSON | **Recommended.** Execute SELECT. Returns `{"rows": [...], "error": "..."}` — distinguishes empty result from query failure. | +| `db_execute_v2(sql, argsJSON)` → JSON | **Recommended.** Execute INSERT/UPDATE/DELETE. Returns `{"rows_affected": N, "last_insert_id": M, "error": "..."}` — distinguishes 0-rows-affected from a real failure. | +| `db_query(sql, argsJSON)` → JSON | Legacy. Execute SELECT, returns JSON array of rows. No way to surface query errors — prefer `db_query_v2`. | +| `db_execute(sql, argsJSON)` → int | Legacy. Returns affected rows ONLY. **Returns 0 for both "0 rows" and "SQL error" — caller can't distinguish.** Prefer `db_execute_v2`. | +| `db_transaction(opsJSON)` → JSON | Atomic batch — see "Database Transactions" below. | -Example query from WASM: -``` -db_query("SELECT push_token, device_type FROM devices WHERE user_id = ?", '["user123"]') -→ [{"push_token": "abc...", "device_type": "ios"}] +Example v2 usage from WASM: + +```go +//go:wasmimport env db_execute_v2 +func dbExecuteV2(sqlPtr, sqlLen, argsPtr, argsLen uint32) uint64 + +resultBytes := callDBExecuteV2(`INSERT INTO event_seq (topic, next_seq) VALUES (?, 0) + ON CONFLICT(topic) DO NOTHING`, + []any{"user/abc/account"}) + +var res struct { + RowsAffected int64 `json:"rows_affected"` + Error string `json:"error"` +} +json.Unmarshal(resultBytes, &res) +if res.Error != "" { + // Real failure — bail out, don't mark migration applied. + return fmt.Errorf("event_seq INSERT failed: %s", res.Error) +} +// res.RowsAffected may legitimately be 0 (ON CONFLICT DO NOTHING) — that's not an error. ``` +The legacy `db_execute` is kept indefinitely so existing functions don't break. New code should use `db_execute_v2` for any path where distinguishing "no rows" from "SQL error" matters — most paths. + ### Cache (Olric Distributed Cache) | Function | Description | @@ -153,6 +194,49 @@ db_query("SELECT push_token, device_type FROM devices WHERE user_id = ?", '["use | `log_info(message)` | Log info-level message (captured in invocation logs). | | `log_error(message)` | Log error-level message. | +## Configuring Push Notifications (per-namespace) + +Push providers (ntfy / Expo) are configured **per namespace** by the tenant — +no operator involvement, no SSH access required. Set, read, or clear via: + +```bash +# Set / update (sensitive credentials are encrypted at rest) +curl -X PUT https://ns-myapp.example.com/v1/push/config \ + -H 'Authorization: Bearer ' \ + -H 'Content-Type: application/json' \ + -d '{ + "ntfy_base_url": "https://ntfy.sh", + "ntfy_auth_token": "tk_…" + }' + +# Read (sensitive fields redacted to booleans) +curl https://ns-myapp.example.com/v1/push/config \ + -H 'Authorization: Bearer ' + +# Clear (push reverts to gateway-wide defaults if any, else 503) +curl -X DELETE https://ns-myapp.example.com/v1/push/config \ + -H 'Authorization: Bearer ' +``` + +### Field semantics + +| Field | Sensitive? | Notes | +|---|---|---| +| `ntfy_base_url` | No | URL of the ntfy server. `https://ntfy.sh` works for testing. | +| `ntfy_auth_token` | Yes | Optional bearer token sent to ntfy. Encrypted at rest. | +| `expo_access_token` | Yes | Expo Push API access token. Encrypted at rest. | + +PUT semantics are **field-level** — a `null` (or omitted) field leaves the +existing value alone; an explicit empty string clears just that field. To +clear EVERYTHING use DELETE. + +After a PUT the next `push_send` (host call) or `POST /v1/push/send` uses +the new providers — the cached dispatcher is invalidated automatically. + +If no per-namespace config is set AND the gateway has no YAML defaults, the +push endpoints return **503 SERVICE_UNAVAILABLE** with a message naming the +exact config to set. + ## Managing Secrets Secrets are encrypted at rest (AES-256-GCM) and scoped to your namespace. Functions read them via `get_secret("name")` at runtime. diff --git a/core/docs/STEALTH_TURN.md b/core/docs/STEALTH_TURN.md new file mode 100644 index 0000000..1005e89 --- /dev/null +++ b/core/docs/STEALTH_TURN.md @@ -0,0 +1,187 @@ +# Stealth TURN Deployment Guide + +## What this is + +A TLS-level SNI router that lets Orama serve TURN-over-TLS on `:443`, +sharing the port with Caddy HTTPS. From a network observer's +perspective, TURN traffic is indistinguishable from ordinary HTTPS — +useful for users in regions that block standard VoIP ports (UAE, Saudi +Arabia, China, Iran). + +## Architecture + +``` + Internet + │ + ▼ + TCP :443 + │ + ┌─────────┴─────────┐ + │ orama-sni-router │ peeks SNI, forwards bytes + └─────────┬─────────┘ + │ + ┌───────────────┼────────────────┐ + ▼ ▼ + cdn. *., + turn. (everything else) + │ │ + ▼ ▼ + Pion TURN-TLS Caddy + 127.0.0.1:5349 127.0.0.1:8443 + (existing) (moved from :443) +``` + +The router does **not** terminate TLS. It reads the unencrypted TLS +ClientHello (first ~5 KB), inspects the SNI extension, and dials the +matching backend. Encrypted bytes pass through verbatim. + +## Components + +- **Library:** `pkg/sniproxy/` — ClientHello parser, route table, TCP server +- **Binary:** `cmd/sni-router/` (built as `bin/orama-sni-router`) +- **Systemd unit:** `systemd/orama-sni-router.service` +- **Config:** `~/.orama/sni-router.yaml` + +## Deployment cutover + +⚠️ **This change touches production `:443`. Stage on one node first, watch for 24h, then roll out.** + +### 1. Reconfigure Caddy to listen on `:8443` + +Update wherever the Caddy config is generated (`pkg/environments/production/installers/caddy.go`) +so Caddy binds `:8443` (HTTPS) and `:8080` (HTTP) instead of `:443` and `:80`. + +Drop `CAP_NET_BIND_SERVICE` from Caddy's systemd unit — it no longer needs privileged ports. + +### 2. Provision the cert SAN for `cdn.` + +Caddy's automatic Let's Encrypt flow needs to issue a cert covering +`cdn.` and `cdn.ns-*.` so Pion TURN can read it +on startup. Add these names to Caddy's TLS config block. + +### 3. Drop `sni-router.yaml` config + +Example for a single-namespace node: + +```yaml +listen: ":443" +client_hello_timeout: 5s +backend_dial_timeout: 5s +max_concurrent_conns: 10000 +fallback: + name: caddy + addr: "127.0.0.1:8443" +routes: + - match: "cdn.example.com" + backend: + name: turn-tls + addr: "127.0.0.1:5349" + - match: "turn.example.com" + backend: + name: turn-tls + addr: "127.0.0.1:5349" +``` + +For multi-namespace, add per-namespace TURN backends (each namespace's +TURN-TLS port is allocated by `pkg/namespace`): + +```yaml + - match: "cdn.ns-myapp.example.com" + backend: { name: "turn-myapp", addr: "127.0.0.1:5349" } + - match: "cdn.ns-other.example.com" + backend: { name: "turn-other", addr: "127.0.0.1:5350" } +``` + +### 4. Deploy + start in order + +```bash +# Install binary +sudo cp bin-linux/orama-sni-router /opt/orama/bin/ + +# Install service +sudo cp systemd/orama-sni-router.service /etc/systemd/system/ +sudo systemctl daemon-reload + +# Stop Caddy briefly (it's about to lose :443) +sudo systemctl stop caddy + +# Start the SNI router (it takes :443) +sudo systemctl enable --now orama-sni-router + +# Restart Caddy on its new port +sudo systemctl start caddy + +# Verify +curl -v https://cdn.:443 # should hit TURN backend (TLS handshake will fail; that's fine) +curl -v https://:443 # should hit Caddy (normal HTTPS response) +``` + +### 5. Enable stealth in the gateway + +Once the SNI router is live, tell the gateway to advertise the stealth URI: + +```go +// in gateway dependencies / startup +webrtcHandlers.SetStealthCDNDomain("cdn.") +``` + +The credentials handler will start including `turns:cdn.:443` +in `POST /v1/webrtc/turn/credentials` responses automatically. + +### 6. Monitor + +```bash +journalctl -u orama-sni-router.service -f +journalctl -u caddy.service -f +``` + +Watch for: +- `Connection limit reached` warnings (bump `max_concurrent_conns`) +- `backend dial failed` warnings (Caddy isn't listening on `:8443`, or TURN isn't on `:5349`) +- `ClientHello peek failed` debugs (curious clients sending non-TLS to `:443` — usually port scanners) + +## Rollback + +If anything is wrong: + +```bash +sudo systemctl stop orama-sni-router +# Reconfigure Caddy back to :443 and restart +sudo systemctl restart caddy +``` + +Caddy reclaiming `:443` from the disabled router is the fastest way back to +the previous topology. + +## Known gaps + +- **Dynamic route source:** today's router reads YAML once at startup. To + pick up new namespaces without restart, implement a `RouteSource` that + polls `pkg/namespace` for active TURN deployments. The library is + already designed for `Router.Replace` to be called concurrently. +- **TLS cert hot-reload:** Pion TURN reads the cert once at startup. When + Caddy renews `cdn.`, Pion needs to be restarted to pick up + the new cert. A small file-watcher service (or a periodic restart in + off-peak hours) handles this for now. + +## What clients see + +Once enabled, the credentials response gains one entry: + +```json +{ + "username": "...", + "password": "...", + "ttl": 600, + "uris": [ + "turn:turn.example.com:3478?transport=udp", + "turn:turn.example.com:3478?transport=tcp", + "turns:turn.example.com:5349", + "turns:cdn.example.com:443" + ] +} +``` + +Browsers iterate ICE candidates; users in restricted regions will silently +succeed via the `:443` URI when others fail. No client-side change is +required. diff --git a/core/migrations/020_node_operators.sql b/core/migrations/020_node_operators.sql new file mode 100644 index 0000000..eb2343c --- /dev/null +++ b/core/migrations/020_node_operators.sql @@ -0,0 +1,14 @@ +-- Add operator wallet tracking to nodes. +-- operator_wallet links nodes to the wallet that provisioned them. + +ALTER TABLE dns_nodes ADD COLUMN operator_wallet TEXT; +ALTER TABLE dns_nodes ADD COLUMN environment TEXT DEFAULT 'production'; +ALTER TABLE dns_nodes ADD COLUMN ssh_user TEXT DEFAULT 'root'; +ALTER TABLE dns_nodes ADD COLUMN role TEXT DEFAULT 'node'; + +CREATE INDEX IF NOT EXISTS idx_dns_nodes_operator ON dns_nodes(operator_wallet); +CREATE INDEX IF NOT EXISTS idx_dns_nodes_environment ON dns_nodes(environment); + +ALTER TABLE wireguard_peers ADD COLUMN operator_wallet TEXT; + +ALTER TABLE invite_tokens ADD COLUMN operator_wallet TEXT; diff --git a/core/migrations/021_pubsub_trigger_patterns.sql b/core/migrations/021_pubsub_trigger_patterns.sql new file mode 100644 index 0000000..5b91e35 --- /dev/null +++ b/core/migrations/021_pubsub_trigger_patterns.sql @@ -0,0 +1,28 @@ +-- ============================================================================= +-- 021_pubsub_trigger_patterns.sql +-- +-- Add `topic_pattern` column alongside the existing `topic` column to +-- function_pubsub_triggers. The new column may contain SQLite GLOB +-- patterns (e.g. "presence:*") in addition to exact topic names. +-- +-- This is intentionally ADDITIVE rather than a column rename to remain +-- safe under rolling upgrades: +-- - Old binaries continue reading `topic` and keep working. +-- - New binaries read `topic_pattern` (which is back-filled from +-- `topic` for existing rows) and write BOTH columns. +-- A future migration can DROP COLUMN topic once every node is on the +-- new release. +-- ============================================================================= + +ALTER TABLE function_pubsub_triggers + ADD COLUMN topic_pattern TEXT NOT NULL DEFAULT ''; + +UPDATE function_pubsub_triggers +SET topic_pattern = topic +WHERE topic_pattern = ''; + +CREATE INDEX IF NOT EXISTS idx_function_pubsub_triggers_function + ON function_pubsub_triggers(function_id); + +CREATE INDEX IF NOT EXISTS idx_function_pubsub_triggers_enabled + ON function_pubsub_triggers(enabled); diff --git a/core/migrations/022_aggregation_windows.sql b/core/migrations/022_aggregation_windows.sql new file mode 100644 index 0000000..05f68f0 --- /dev/null +++ b/core/migrations/022_aggregation_windows.sql @@ -0,0 +1,20 @@ +-- ============================================================================= +-- 022_aggregation_windows.sql +-- +-- Add per-trigger aggregation parameters to function_pubsub_triggers. +-- +-- aggregation_window_ms = 0 means "no aggregation, invoke once per event" +-- (the existing behaviour). Any positive value enables buffering of events +-- in-memory on the dispatching node; the function is invoked once per +-- window with a batched payload. +-- +-- aggregation_max_batch_size caps the per-window batch. When the buffer +-- reaches this size, the dispatcher flushes immediately even if the +-- window timer hasn't fired yet. +-- ============================================================================= + +ALTER TABLE function_pubsub_triggers + ADD COLUMN aggregation_window_ms INTEGER NOT NULL DEFAULT 0; + +ALTER TABLE function_pubsub_triggers + ADD COLUMN aggregation_max_batch_size INTEGER NOT NULL DEFAULT 100; diff --git a/core/migrations/023_push_devices.sql b/core/migrations/023_push_devices.sql new file mode 100644 index 0000000..bc6c908 --- /dev/null +++ b/core/migrations/023_push_devices.sql @@ -0,0 +1,33 @@ +-- ============================================================================= +-- 023_push_devices.sql +-- +-- Per-namespace, per-user push notification device registry. +-- +-- token_encrypted is AES-256-GCM ciphertext (prefix 'enc:') derived via +-- pkg/secrets. Tokens are sensitive — they let the holder spam a user's +-- device — so they are never returned via any API or written to logs. +-- +-- provider matches a registered push.PushProvider name: +-- 'ntfy', 'expo', 'apns', 'fcm' (future), ... +-- ============================================================================= + +CREATE TABLE IF NOT EXISTS push_devices ( + id TEXT PRIMARY KEY, + namespace TEXT NOT NULL, + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + provider TEXT NOT NULL, + token_encrypted TEXT NOT NULL, + platform TEXT, + app_version TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + last_seen INTEGER, + UNIQUE(namespace, user_id, device_id) +); + +CREATE INDEX IF NOT EXISTS idx_push_devices_user + ON push_devices(namespace, user_id); + +CREATE INDEX IF NOT EXISTS idx_push_devices_provider + ON push_devices(provider); diff --git a/core/migrations/024_namespace_publish_seq.sql b/core/migrations/024_namespace_publish_seq.sql new file mode 100644 index 0000000..8aef559 --- /dev/null +++ b/core/migrations/024_namespace_publish_seq.sql @@ -0,0 +1,18 @@ +-- ============================================================================= +-- 024_namespace_publish_seq.sql +-- +-- Per-namespace monotonically-increasing sequence number assigned by +-- exec_and_publish (plan 08). The seq is included in the wake-up payload so +-- subscribers can detect "I'm behind, retry" gaps caused by cross-node +-- replication lag between the leader's commit and the gossipsub message. +-- +-- The row is upserted in the same atomic batch as the user's writes, so the +-- assigned seq exactly mirrors the commit number. See plan: +-- core/plans/platform/08_EXEC_AND_PUBLISH.md +-- ============================================================================= + +CREATE TABLE IF NOT EXISTS namespace_publish_seq ( + namespace TEXT PRIMARY KEY, + next_seq BIGINT NOT NULL DEFAULT 1, + updated_at INTEGER NOT NULL +); diff --git a/core/migrations/025_persistent_ws.sql b/core/migrations/025_persistent_ws.sql new file mode 100644 index 0000000..225c543 --- /dev/null +++ b/core/migrations/025_persistent_ws.sql @@ -0,0 +1,18 @@ +-- ============================================================================= +-- 025_persistent_ws.sql +-- +-- Persistent WebSocket function settings — see plan +-- core/plans/platform/06_PERSISTENT_WS_FUNCTIONS.md +-- +-- When ws_persistent is true, the function is bound to a single WebSocket +-- connection for its lifetime; exports ws_open / ws_frame / ws_close instead +-- of the default _start. See pkg/serverless/persistent for runtime details. +-- +-- All defaults are zero / false → backward compatible: existing functions +-- continue to use the per-frame stateless WS model. +-- ============================================================================= + +ALTER TABLE functions ADD COLUMN ws_persistent BOOLEAN DEFAULT FALSE; +ALTER TABLE functions ADD COLUMN ws_idle_timeout_sec INTEGER DEFAULT 0; +ALTER TABLE functions ADD COLUMN ws_max_frame_bytes INTEGER DEFAULT 0; +ALTER TABLE functions ADD COLUMN ws_max_inflight_per_conn INTEGER DEFAULT 0; diff --git a/core/migrations/026_namespace_push_config.sql b/core/migrations/026_namespace_push_config.sql new file mode 100644 index 0000000..554ba7f --- /dev/null +++ b/core/migrations/026_namespace_push_config.sql @@ -0,0 +1,26 @@ +-- ============================================================================= +-- 026_namespace_push_config.sql +-- +-- Per-namespace push notification provider configuration. Tenants set their +-- own ntfy / expo credentials via PUT /v1/push/config without operator +-- involvement (bug #220 follow-up — self-service tenant config). +-- +-- Sensitive credentials (auth tokens) are AES-256-GCM ciphertext via +-- pkg/secrets, prefix 'enc:'. Non-secret URLs (ntfy_base_url) stored +-- plaintext — they leak no security material. +-- +-- The gateway YAML config remains as a global fallback / default. A row +-- in this table OVERRIDES the YAML for that namespace; absence falls back. +-- ============================================================================= + +CREATE TABLE IF NOT EXISTS namespace_push_config ( + namespace TEXT PRIMARY KEY, + -- ntfy provider config (URL is non-secret; auth token is) + ntfy_base_url TEXT, + ntfy_auth_token_encrypted TEXT, + -- expo provider config (the access token IS sensitive) + expo_access_token_encrypted TEXT, + -- Audit metadata: who set this, and when (last update wins). + updated_at INTEGER NOT NULL, + updated_by TEXT +); diff --git a/core/migrations/contract.go b/core/migrations/contract.go new file mode 100644 index 0000000..52e410f --- /dev/null +++ b/core/migrations/contract.go @@ -0,0 +1,194 @@ +// Package migrations holds the embedded SQL migrations for the gateway's +// RQLite registry. This file defines the schema-version contract every +// gateway binary must enforce at startup. +// +// The contract: +// +// 1. The binary embeds every migration file in this directory. +// 2. RequiredVersion() returns the highest numbered migration in the embed. +// This is the schema version the binary REQUIRES to function correctly. +// 3. AssertSchema(ctx, db) queries the schema_migrations table and returns +// a typed *SchemaMismatchError if the applied version is below +// RequiredVersion. Gateway startup MUST treat this as fatal. +// +// Why: a rolling upgrade can swap the gateway binary without restarting the +// underlying RQLite process. If a new binary expects columns added by a +// migration the RQLite-process startup never re-ran, INSERTs fail with +// cryptic errors at runtime. Asserting the contract at startup catches the +// mismatch immediately with an actionable error message. +// +// See plan: this file is the long-term fix for the AnChat-test "missing +// ws_max_frame_bytes column" incident (2026-05-06). +package migrations + +import ( + "context" + "database/sql" + "fmt" + "io/fs" + "sort" + "strconv" + "strings" +) + +// MigrationInfo describes one embedded migration. +type MigrationInfo struct { + Version int + Name string + Path string +} + +// allMigrations returns every embedded migration sorted by version ascending. +// Computed once at startup; cheap to call repeatedly. +var allMigrations = mustListMigrations() + +func mustListMigrations() []MigrationInfo { + entries, err := fs.ReadDir(FS, ".") + if err != nil { + // In practice this can't happen — the embed.FS is built from a + // known directory. If it does, we can't safely run anything. + panic(fmt.Sprintf("migrations: failed to list embedded files: %v", err)) + } + + var out []MigrationInfo + for _, e := range entries { + if e.IsDir() { + continue + } + name := e.Name() + if !strings.HasSuffix(name, ".sql") { + continue + } + v, ok := parseVersion(name) + if !ok { + continue + } + out = append(out, MigrationInfo{ + Version: v, + Name: strings.TrimSuffix(name, ".sql"), + Path: name, + }) + } + sort.Slice(out, func(i, j int) bool { return out[i].Version < out[j].Version }) + return out +} + +// parseVersion extracts the integer prefix from "001_initial.sql" → 1. +// Returns ok=false for files without a leading numeric prefix. +func parseVersion(filename string) (int, bool) { + idx := strings.IndexByte(filename, '_') + if idx <= 0 { + return 0, false + } + v, err := strconv.Atoi(filename[:idx]) + if err != nil { + return 0, false + } + return v, true +} + +// All returns a snapshot of every embedded migration, sorted by version. +// The returned slice is a copy; safe to mutate. +func All() []MigrationInfo { + out := make([]MigrationInfo, len(allMigrations)) + copy(out, allMigrations) + return out +} + +// RequiredVersion returns the highest migration version embedded in this +// binary. Panics if no migrations are embedded (impossible in practice). +// +// This is the schema version the binary requires. The gateway asserts at +// startup that the database's applied schema is >= this value. +func RequiredVersion() int { + if len(allMigrations) == 0 { + panic("migrations: no embedded migrations found") + } + return allMigrations[len(allMigrations)-1].Version +} + +// SchemaMismatchError is returned when the database's applied schema is +// behind what the binary requires. Gateway startup MUST treat this as fatal +// and log the actionable hint. +type SchemaMismatchError struct { + RequiredVersion int + AppliedVersion int + Pending []MigrationInfo // migrations the binary has but the DB lacks +} + +func (e *SchemaMismatchError) Error() string { + pending := make([]string, 0, len(e.Pending)) + for _, m := range e.Pending { + pending = append(pending, fmt.Sprintf("%03d (%s)", m.Version, m.Name)) + } + return fmt.Sprintf( + "schema mismatch: binary requires version %d, database has %d. "+ + "Pending migrations: [%s]. "+ + "Run `orama node migrate-apply` on the namespace's RQLite to fix.", + e.RequiredVersion, e.AppliedVersion, strings.Join(pending, ", "), + ) +} + +// AppliedVersion queries the schema_migrations table and returns the highest +// version recorded as applied. Returns 0 (with nil error) if the table is +// empty — that's a fresh database, valid state. +// +// Returns an error if the schema_migrations table itself doesn't exist or +// can't be read; callers must distinguish that from "applied=0". +func AppliedVersion(ctx context.Context, db *sql.DB) (int, error) { + row := db.QueryRowContext(ctx, `SELECT COALESCE(MAX(version), 0) FROM schema_migrations`) + var v int + if err := row.Scan(&v); err != nil { + return 0, fmt.Errorf("migrations: query schema_migrations: %w", err) + } + return v, nil +} + +// AssertSchema verifies the database's applied schema is at least +// RequiredVersion(). Returns nil on match-or-newer, *SchemaMismatchError +// on lag. +// +// Newer-than-required is OK — that means an older binary is talking to a +// database that's been advanced by a newer binary in the cluster. The +// binary just won't use whatever the newer columns enable. (Gateway +// startup should still allow this; it's a normal rolling-upgrade window.) +func AssertSchema(ctx context.Context, db *sql.DB) error { + required := RequiredVersion() + applied, err := AppliedVersion(ctx, db) + if err != nil { + return fmt.Errorf("migrations.AssertSchema: %w", err) + } + if applied >= required { + return nil + } + + // Compute pending migrations for the error message. + pending := make([]MigrationInfo, 0) + for _, m := range allMigrations { + if m.Version > applied { + pending = append(pending, m) + } + } + return &SchemaMismatchError{ + RequiredVersion: required, + AppliedVersion: applied, + Pending: pending, + } +} + +// PendingMigrations returns migrations the binary has but the database +// hasn't applied. Used by the `orama node migrate-status` CLI to show +// the operator what would be applied by a `migrate-apply`. +func PendingMigrations(ctx context.Context, db *sql.DB) ([]MigrationInfo, error) { + applied, err := AppliedVersion(ctx, db) + if err != nil { + return nil, err + } + out := make([]MigrationInfo, 0) + for _, m := range allMigrations { + if m.Version > applied { + out = append(out, m) + } + } + return out, nil +} diff --git a/core/migrations/contract_test.go b/core/migrations/contract_test.go new file mode 100644 index 0000000..dcaa686 --- /dev/null +++ b/core/migrations/contract_test.go @@ -0,0 +1,231 @@ +package migrations + +import ( + "context" + "database/sql" + "errors" + "strings" + "testing" + + _ "github.com/mattn/go-sqlite3" +) + +// openTestDB returns an in-memory SQLite database. The migrations contract +// only cares about ANSI-ish SQL (CREATE TABLE, SELECT MAX, INSERT) — we +// don't need RQLite's distributed semantics for these tests. +func openTestDB(t *testing.T) *sql.DB { + t.Helper() + db, err := sql.Open("sqlite3", ":memory:") + if err != nil { + t.Fatalf("open in-memory sqlite: %v", err) + } + t.Cleanup(func() { _ = db.Close() }) + return db +} + +func ensureMigrationsTable(t *testing.T, db *sql.DB) { + t.Helper() + _, err := db.Exec(` + CREATE TABLE IF NOT EXISTS schema_migrations ( + version INTEGER PRIMARY KEY, + applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + )`) + if err != nil { + t.Fatalf("create schema_migrations: %v", err) + } +} + +func TestRequiredVersion_matches_highest_embedded(t *testing.T) { + all := All() + if len(all) == 0 { + t.Fatal("no embedded migrations — embed.FS broken?") + } + want := all[len(all)-1].Version + if got := RequiredVersion(); got != want { + t.Errorf("RequiredVersion() = %d, want %d", got, want) + } +} + +func TestAll_returns_sorted_copy(t *testing.T) { + a := All() + for i := 1; i < len(a); i++ { + if a[i-1].Version >= a[i].Version { + t.Errorf("All() not sorted: %d before %d", a[i-1].Version, a[i].Version) + } + } + // Mutating the returned slice must not affect subsequent calls. + if len(a) > 0 { + a[0].Version = -999 + } + a2 := All() + if len(a2) > 0 && a2[0].Version == -999 { + t.Error("All() returned a shared slice — subsequent calls see mutation") + } +} + +func TestAppliedVersion_empty_returns_zero(t *testing.T) { + db := openTestDB(t) + ensureMigrationsTable(t, db) + + v, err := AppliedVersion(context.Background(), db) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if v != 0 { + t.Errorf("expected 0 for empty schema_migrations, got %d", v) + } +} + +func TestAppliedVersion_returns_max(t *testing.T) { + db := openTestDB(t) + ensureMigrationsTable(t, db) + for _, v := range []int{1, 5, 3, 10, 7} { + _, err := db.Exec("INSERT INTO schema_migrations (version) VALUES (?)", v) + if err != nil { + t.Fatalf("insert %d: %v", v, err) + } + } + v, err := AppliedVersion(context.Background(), db) + if err != nil { + t.Fatalf("AppliedVersion: %v", err) + } + if v != 10 { + t.Errorf("expected 10, got %d", v) + } +} + +func TestAppliedVersion_no_table_returns_error(t *testing.T) { + db := openTestDB(t) + // Don't create schema_migrations table. + _, err := AppliedVersion(context.Background(), db) + if err == nil { + t.Fatal("expected error when schema_migrations missing") + } +} + +func TestAssertSchema_ok_when_at_required(t *testing.T) { + db := openTestDB(t) + ensureMigrationsTable(t, db) + _, err := db.Exec("INSERT INTO schema_migrations (version) VALUES (?)", RequiredVersion()) + if err != nil { + t.Fatalf("seed: %v", err) + } + if err := AssertSchema(context.Background(), db); err != nil { + t.Errorf("AssertSchema returned error when at required version: %v", err) + } +} + +func TestAssertSchema_ok_when_above_required(t *testing.T) { + db := openTestDB(t) + ensureMigrationsTable(t, db) + _, err := db.Exec("INSERT INTO schema_migrations (version) VALUES (?)", RequiredVersion()+10) + if err != nil { + t.Fatalf("seed: %v", err) + } + if err := AssertSchema(context.Background(), db); err != nil { + t.Errorf("AssertSchema returned error when ahead of required: %v", err) + } +} + +func TestAssertSchema_fails_when_below_required(t *testing.T) { + db := openTestDB(t) + ensureMigrationsTable(t, db) + // Seed only the first migration. + _, err := db.Exec("INSERT INTO schema_migrations (version) VALUES (?)", 1) + if err != nil { + t.Fatalf("seed: %v", err) + } + + err = AssertSchema(context.Background(), db) + if err == nil { + t.Fatal("expected SchemaMismatchError, got nil") + } + var smErr *SchemaMismatchError + if !errors.As(err, &smErr) { + t.Fatalf("expected *SchemaMismatchError, got %T: %v", err, err) + } + if smErr.RequiredVersion != RequiredVersion() { + t.Errorf("RequiredVersion mismatch: got %d, want %d", smErr.RequiredVersion, RequiredVersion()) + } + if smErr.AppliedVersion != 1 { + t.Errorf("AppliedVersion mismatch: got %d, want 1", smErr.AppliedVersion) + } + if len(smErr.Pending) == 0 { + t.Error("expected pending migrations list, got empty") + } + + // Error message must contain the actionable hint. + if !strings.Contains(err.Error(), "orama node migrate") { + t.Errorf("error message lacks actionable hint: %v", err) + } +} + +func TestPendingMigrations_empty_when_at_required(t *testing.T) { + db := openTestDB(t) + ensureMigrationsTable(t, db) + _, _ = db.Exec("INSERT INTO schema_migrations (version) VALUES (?)", RequiredVersion()) + + pending, err := PendingMigrations(context.Background(), db) + if err != nil { + t.Fatalf("PendingMigrations: %v", err) + } + if len(pending) != 0 { + t.Errorf("expected 0 pending, got %d", len(pending)) + } +} + +func TestPendingMigrations_lists_all_when_empty_db(t *testing.T) { + db := openTestDB(t) + ensureMigrationsTable(t, db) + pending, err := PendingMigrations(context.Background(), db) + if err != nil { + t.Fatalf("PendingMigrations: %v", err) + } + if len(pending) != len(All()) { + t.Errorf("expected %d pending (all), got %d", len(All()), len(pending)) + } +} + +func TestParseVersion(t *testing.T) { + cases := []struct { + name string + in string + want int + ok bool + }{ + {"valid 3-digit", "001_initial.sql", 1, true}, + {"valid 25", "025_persistent_ws.sql", 25, true}, + {"valid 100", "100_future.sql", 100, true}, + {"no underscore", "999.sql", 0, false}, + {"non-numeric prefix", "abc_initial.sql", 0, false}, + {"empty", "", 0, false}, + {"only underscore", "_x.sql", 0, false}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + got, ok := parseVersion(c.in) + if ok != c.ok || got != c.want { + t.Errorf("parseVersion(%q) = (%d, %v), want (%d, %v)", + c.in, got, ok, c.want, c.ok) + } + }) + } +} + +func TestSchemaMismatchError_message_lists_pending(t *testing.T) { + e := &SchemaMismatchError{ + RequiredVersion: 25, + AppliedVersion: 22, + Pending: []MigrationInfo{ + {Version: 23, Name: "push_devices"}, + {Version: 24, Name: "namespace_publish_seq"}, + {Version: 25, Name: "persistent_ws"}, + }, + } + msg := e.Error() + for _, want := range []string{"025", "024", "023", "push_devices", "namespace_publish_seq", "persistent_ws", "orama node migrate"} { + if !strings.Contains(msg, want) { + t.Errorf("error message missing %q: %s", want, msg) + } + } +} diff --git a/core/migrations/roundtrip_test.go b/core/migrations/roundtrip_test.go new file mode 100644 index 0000000..c3c915f --- /dev/null +++ b/core/migrations/roundtrip_test.go @@ -0,0 +1,237 @@ +package migrations_test + +// roundtrip_test.go is the build-time guard that prevents +// "binary references column X but X is missing from migrations" +// drift — the bug that triggered the AnChat-test outage on 2026-05-06. +// +// How it works: +// +// 1. Open an in-memory SQLite database. +// 2. Apply EVERY embedded migration in version order. +// 3. Run a series of "exemplar" SQL operations against the resulting +// schema. If any operation fails, the test fails — meaning either: +// a. A migration was deleted / renumbered and the schema regressed +// b. A new migration was added but isn't reachable via embed.FS +// c. (Most importantly) a Go file references a column / table / +// index that no migration creates +// +// The exemplars are drawn from the actual SQL strings the platform's +// Go code executes. Adding a new INSERT/SELECT in the gateway → add the +// matching exemplar here so drift is caught at `go test` time, not +// at production deploy. +// +// This is generic by design — every platform table participates. Adding +// a new table doesn't require new test infrastructure, only one new +// exemplar string. + +import ( + "database/sql" + "strings" + "testing" + + "github.com/DeBrosOfficial/network/migrations" + "github.com/DeBrosOfficial/network/pkg/rqlite" + _ "github.com/mattn/go-sqlite3" + "go.uber.org/zap" +) + +// TestSchemaRoundtrip_AllMigrationsApplyClean verifies every embedded +// migration applies successfully against a fresh database in version +// order. Failure here means a migration is broken in isolation +// (syntax error, references a missing prior migration's column, etc.). +func TestSchemaRoundtrip_AllMigrationsApplyClean(t *testing.T) { + db := openRoundtripDB(t) + if err := rqlite.ApplyEmbeddedMigrations(t.Context(), db, migrations.FS, zap.NewNop()); err != nil { + t.Fatalf("ApplyEmbeddedMigrations failed: %v", err) + } + + // Sanity: applied version should equal RequiredVersion. + applied, err := migrations.AppliedVersion(t.Context(), db) + if err != nil { + t.Fatalf("AppliedVersion: %v", err) + } + if applied != migrations.RequiredVersion() { + t.Errorf("applied=%d != required=%d after full roundtrip", applied, migrations.RequiredVersion()) + } +} + +// TestSchemaRoundtrip_PlatformExemplars exercises representative SQL +// statements from the Go codebase against the migrated schema. +// +// Each exemplar is a string that should EXECUTE successfully (we don't +// care about row counts — only that the SQL parses and binds against +// the schema). Args are placeholders; values can be anything matching +// the column types. +// +// When a Go handler is added that touches a new table or column, add +// an exemplar here. The diff at review time enforces the contract: +// "if you write Go that uses column X, an exemplar exercises it, +// which means migrations must declare X." +func TestSchemaRoundtrip_PlatformExemplars(t *testing.T) { + db := openRoundtripDB(t) + if err := rqlite.ApplyEmbeddedMigrations(t.Context(), db, migrations.FS, zap.NewNop()); err != nil { + t.Fatalf("ApplyEmbeddedMigrations: %v", err) + } + + // Each exemplar is (table, sql, args). The args don't have to satisfy + // constraints — we use Prepare to validate column references without + // actually running mutations. Statements that have to execute (because + // SQLite delays some checks) get marked exec=true. + type exemplar struct { + name string + sql string + args []any + exec bool // true: actually execute; false: just Prepare + } + exemplars := []exemplar{ + // functions table — bug #214's table, which is why we care. + // Every column written by the function-store INSERT must be here. + { + name: "functions INSERT (full column list incl. ws_*)", + sql: `INSERT INTO functions ( + id, name, namespace, version, wasm_cid, + memory_limit_mb, timeout_seconds, is_public, + retry_count, retry_delay_seconds, dlq_topic, + status, created_at, updated_at, created_by, + ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + args: []any{ + "id-1", "fn", "ns", 1, "cid-1", + 64, 30, false, + 0, 5, "", + "active", 0, 0, "ns", + false, 0, 0, 0, + }, + exec: true, + }, + { + name: "functions SELECT (full column list)", + sql: `SELECT id, name, namespace, version, wasm_cid, source_cid, + ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, + memory_limit_mb, timeout_seconds, is_public, + retry_count, retry_delay_seconds, dlq_topic, + status, created_at, updated_at, created_by + FROM functions WHERE namespace = ? AND name = ?`, + args: []any{"ns", "fn"}, + }, + + // function_invocations — used by the invocation-history view (#211 fix). + { + name: "function_invocations INSERT", + sql: `INSERT INTO function_invocations ( + id, function_id, request_id, trigger_type, caller_wallet, + input_size, output_size, started_at, completed_at, + duration_ms, status, error_message, memory_used_mb + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + args: []any{ + "inv-1", "id-1", "req-A", "http", "0xwallet", + 0, 0, 0, 0, + 0, "success", "", 0.0, + }, + exec: true, + }, + { + name: "function_invocations SELECT for GetInvocations", + sql: `SELECT i.id, i.request_id, i.trigger_type, i.caller_wallet, + i.input_size, i.output_size, i.started_at, i.completed_at, + i.duration_ms, i.status, i.error_message, i.memory_used_mb + FROM function_invocations i + JOIN functions f ON i.function_id = f.id + WHERE f.namespace = ? AND f.name = ? + ORDER BY i.started_at DESC LIMIT ?`, + args: []any{"ns", "fn", 50}, + }, + + // function_logs — WASM-emitted log lines. + { + name: "function_logs INSERT", + sql: `INSERT INTO function_logs ( + id, function_id, invocation_id, level, message, timestamp + ) VALUES (?, ?, ?, ?, ?, ?)`, + args: []any{"log-1", "id-1", "inv-1", "info", "hi", 0}, + exec: true, + }, + + // function_pubsub_triggers — wildcard trigger column rename (plan 03). + // During the dual-column rolling-upgrade window the Go code writes + // BOTH `topic` (legacy NOT NULL) and `topic_pattern` (new); this + // exemplar mirrors the actual INSERT and would catch a future + // migration that drops one column without a corresponding code change. + { + name: "function_pubsub_triggers INSERT (dual topic+topic_pattern)", + sql: `INSERT INTO function_pubsub_triggers ( + id, function_id, topic, topic_pattern, + enabled, created_at, + aggregation_window_ms, aggregation_max_batch_size + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + args: []any{"trig-1", "id-1", "presence:*", "presence:*", true, 0, 0, 0}, + exec: true, + }, + + // push_devices — created by migration 023; encrypted token storage. + { + name: "push_devices INSERT", + sql: `INSERT INTO push_devices ( + id, namespace, user_id, device_id, provider, + token_encrypted, platform, app_version, + created_at, updated_at, last_seen + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + args: []any{ + "dev-1", "ns", "u1", "device-A", "ntfy", + "enc:...", "ios", "1.0", + 0, 0, 0, + }, + exec: true, + }, + + // namespace_publish_seq — sequence counter from plan 08. + { + name: "namespace_publish_seq UPSERT", + sql: `INSERT INTO namespace_publish_seq (namespace, next_seq, updated_at) + VALUES (?, ?, ?) + ON CONFLICT(namespace) DO UPDATE SET + next_seq = next_seq + 1, + updated_at = excluded.updated_at`, + args: []any{"ns", 2, 0}, + exec: true, + }, + } + + for _, ex := range exemplars { + t.Run(ex.name, func(t *testing.T) { + if ex.exec { + if _, err := db.Exec(ex.sql, ex.args...); err != nil { + t.Errorf("schema drift: %v\nsql: %s", err, snippet(ex.sql)) + } + return + } + stmt, err := db.Prepare(ex.sql) + if err != nil { + t.Errorf("schema drift (Prepare failed): %v\nsql: %s", err, snippet(ex.sql)) + return + } + defer func() { _ = stmt.Close() }() + }) + } +} + +// openRoundtripDB returns an in-memory SQLite. Closes automatically on +// test cleanup. +func openRoundtripDB(t *testing.T) *sql.DB { + t.Helper() + db, err := sql.Open("sqlite3", ":memory:") + if err != nil { + t.Fatalf("open in-memory sqlite: %v", err) + } + t.Cleanup(func() { _ = db.Close() }) + return db +} + +// snippet trims a SQL string to fit on a single error line. +func snippet(s string) string { + s = strings.Join(strings.Fields(s), " ") + if len(s) > 140 { + return s[:140] + "..." + } + return s +} diff --git a/core/pkg/auth/rootwallet.go b/core/pkg/auth/rootwallet.go index a141816..78ebb9b 100644 --- a/core/pkg/auth/rootwallet.go +++ b/core/pkg/auth/rootwallet.go @@ -3,54 +3,58 @@ package auth import ( "bufio" "bytes" + "context" "encoding/json" "fmt" "io" "net/http" "os" - "os/exec" "strings" "time" + "github.com/DeBrosOfficial/network/pkg/rwagent" "github.com/DeBrosOfficial/network/pkg/tlsutil" ) -// IsRootWalletInstalled checks if the `rw` CLI is available in PATH +// IsRootWalletInstalled checks if the rootwallet agent is reachable. func IsRootWalletInstalled() bool { - _, err := exec.LookPath("rw") - return err == nil + client := rwagent.New(os.Getenv("RW_AGENT_SOCK")) + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + return client.IsRunning(ctx) } -// getRootWalletAddress gets the EVM address from the RootWallet keystore +// getRootWalletAddress gets the EVM address from the rootwallet agent. func getRootWalletAddress() (string, error) { - cmd := exec.Command("rw", "address", "--chain", "evm") - cmd.Stderr = os.Stderr - out, err := cmd.Output() + client := rwagent.New(os.Getenv("RW_AGENT_SOCK")) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + data, err := client.GetAddress(ctx, "evm") if err != nil { - return "", fmt.Errorf("failed to get address from rw: %w", err) + return "", fmt.Errorf("failed to get address from rootwallet agent: %w", err) } - addr := strings.TrimSpace(string(out)) - if addr == "" { - return "", fmt.Errorf("rw returned empty address — run 'rw init' first") + if data.Address == "" { + return "", fmt.Errorf("rootwallet agent returned empty address") } - return addr, nil + return data.Address, nil } -// signWithRootWallet signs a message using RootWallet's EVM key. -// Stdin is passed through so the user can enter their password if the session is expired. +// signWithRootWallet signs a message using the rootwallet agent's EVM key. +// The desktop app may prompt the user for approval. func signWithRootWallet(message string) (string, error) { - cmd := exec.Command("rw", "sign", message, "--chain", "evm") - cmd.Stdin = os.Stdin - cmd.Stderr = os.Stderr - out, err := cmd.Output() + client := rwagent.New(os.Getenv("RW_AGENT_SOCK")) + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + data, err := client.Sign(ctx, message, "evm") if err != nil { - return "", fmt.Errorf("failed to sign with rw: %w", err) + return "", fmt.Errorf("failed to sign with rootwallet agent: %w", err) } - sig := strings.TrimSpace(string(out)) - if sig == "" { - return "", fmt.Errorf("rw returned empty signature") + if data.Signature == "" { + return "", fmt.Errorf("rootwallet agent returned empty signature") } - return sig, nil + return data.Signature, nil } // PerformRootWalletAuthentication performs a challenge-response authentication flow diff --git a/core/pkg/cli/build/builder.go b/core/pkg/cli/build/builder.go index 2c306d4..51a32e1 100644 --- a/core/pkg/cli/build/builder.go +++ b/core/pkg/cli/build/builder.go @@ -157,6 +157,7 @@ func (b *Builder) buildOramaBinaries() error { {Name: "identity", Package: "./cmd/identity/"}, {Name: "sfu", Package: "./cmd/sfu/"}, {Name: "turn", Package: "./cmd/turn/"}, + {Name: "orama-sni-router", Package: "./cmd/sni-router/"}, } for _, bin := range binaries { @@ -197,8 +198,8 @@ func (b *Builder) buildVaultGuardian() error { return fmt.Errorf("zig not found in PATH — install from https://ziglang.org/download/") } - // Vault source is sibling to orama project - vaultDir := filepath.Join(b.projectDir, "..", "orama-vault") + // Vault source is sibling to core/ within the orama monorepo + vaultDir := filepath.Join(b.projectDir, "..", "vault") if _, err := os.Stat(filepath.Join(vaultDir, "build.zig")); err != nil { return fmt.Errorf("vault source not found at %s — expected orama-vault as sibling directory: %w", vaultDir, err) } diff --git a/core/pkg/cli/cmd/node/migrate_conf.go b/core/pkg/cli/cmd/node/migrate_conf.go new file mode 100644 index 0000000..1b9e2af --- /dev/null +++ b/core/pkg/cli/cmd/node/migrate_conf.go @@ -0,0 +1,116 @@ +package node + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "github.com/DeBrosOfficial/network/pkg/auth" + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/spf13/cobra" +) + +var migrateConfEnv string + +var migrateConfCmd = &cobra.Command{ + Use: "migrate-conf", + Short: "Register nodes.conf nodes with your wallet", + Long: `One-time migration: reads nodes from nodes.conf for an environment +and registers each with your wallet via the gateway API. After migration, +these nodes will appear in 'orama nodes' output. + +Requires: orama auth login (for API authentication)`, + RunE: func(cmd *cobra.Command, args []string) error { + env := migrateConfEnv + if env == "" { + active, err := cli.GetActiveEnvironment() + if err != nil { + return fmt.Errorf("failed to get active environment: %w", err) + } + env = active.Name + } + + // Load nodes from nodes.conf + nodes, err := remotessh.LoadEnvNodes(env) + if err != nil { + return fmt.Errorf("failed to load nodes.conf: %w", err) + } + + // Get gateway URL + envConfig, err := cli.GetEnvironmentByName(env) + if err != nil { + return fmt.Errorf("environment %q not configured: %w", env, err) + } + + // Load stored credentials + store, err := auth.LoadEnhancedCredentials() + if err != nil { + return fmt.Errorf("failed to load credentials: %w", err) + } + creds := store.GetDefaultCredential(envConfig.GatewayURL) + if creds == nil || creds.APIKey == "" { + return fmt.Errorf("no credentials for %s — run 'orama auth login' first", envConfig.GatewayURL) + } + + if len(nodes) == 0 { + fmt.Printf("No nodes found for environment %q in nodes.conf\n", env) + return nil + } + + fmt.Printf("Migrating %d node(s) from nodes.conf to %s...\n\n", len(nodes), env) + + httpClient := &http.Client{Timeout: 10 * time.Second} + registered := 0 + + for _, n := range nodes { + body := map[string]string{ + "ip_address": n.Host, + "environment": env, + "role": n.Role, + "ssh_user": n.User, + } + payload, _ := json.Marshal(body) + + req, err := http.NewRequest(http.MethodPost, + envConfig.GatewayURL+"/v1/operator/node/register", + bytes.NewReader(payload)) + if err != nil { + fmt.Fprintf(cmd.ErrOrStderr(), " %s: failed to create request: %v\n", n.Host, err) + continue + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-API-Key", creds.APIKey) + + resp, err := httpClient.Do(req) + if err != nil { + fmt.Fprintf(cmd.ErrOrStderr(), " %s: request failed: %v\n", n.Host, err) + continue + } + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + resp.Body.Close() + + if resp.StatusCode == http.StatusOK { + fmt.Printf(" %s (%s): registered\n", n.Host, n.Role) + registered++ + } else if resp.StatusCode == http.StatusNotFound { + fmt.Printf(" %s: not found in cluster (node may not have joined yet)\n", n.Host) + } else { + fmt.Fprintf(cmd.ErrOrStderr(), " %s: HTTP %d: %s\n", n.Host, resp.StatusCode, string(respBody)) + } + } + + fmt.Printf("\n%d/%d nodes registered with your wallet\n", registered, len(nodes)) + if registered < len(nodes) { + fmt.Println("Nodes not found may need to join the cluster first, then re-run this command.") + } + return nil + }, +} + +func init() { + migrateConfCmd.Flags().StringVar(&migrateConfEnv, "env", "", "Environment to migrate (default: active)") +} diff --git a/core/pkg/cli/cmd/node/node.go b/core/pkg/cli/cmd/node/node.go index 74f9744..be55603 100644 --- a/core/pkg/cli/cmd/node/node.go +++ b/core/pkg/cli/cmd/node/node.go @@ -32,4 +32,7 @@ func init() { Cmd.AddCommand(recoverRaftCmd) Cmd.AddCommand(enrollCmd) Cmd.AddCommand(unlockCmd) + Cmd.AddCommand(migrateConfCmd) + Cmd.AddCommand(setupCmd) + Cmd.AddCommand(schemaCmd) } diff --git a/core/pkg/cli/cmd/node/schema.go b/core/pkg/cli/cmd/node/schema.go new file mode 100644 index 0000000..905c103 --- /dev/null +++ b/core/pkg/cli/cmd/node/schema.go @@ -0,0 +1,264 @@ +// Package node — schema subcommand. Operator-facing commands for +// inspecting and applying the embedded gateway schema migrations against +// the local RQLite instance. +// +// `orama node schema status` — non-destructive: shows binary's required +// schema version, applied version, and pending +// migrations. Useful in rolling-upgrade +// monitoring. +// +// `orama node schema apply` — applies any pending migrations. Idempotent +// and safe to re-run; ALTER TABLE failures for +// existing columns are tolerated. Confirms +// before running unless --yes is passed. +// +// These are the long-term fix for the "schema lag after gateway-only +// upgrade" class of incident. See migrations/contract.go for the contract. +package node + +import ( + "context" + "database/sql" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/DeBrosOfficial/network/migrations" + "github.com/DeBrosOfficial/network/pkg/config" + "github.com/DeBrosOfficial/network/pkg/rqlite" + "github.com/spf13/cobra" + "go.uber.org/zap" + + _ "github.com/rqlite/gorqlite/stdlib" +) + +var ( + schemaDSN string + schemaYes bool +) + +var schemaCmd = &cobra.Command{ + Use: "schema", + Short: "Inspect and apply gateway schema migrations against the local RQLite", + Long: `Schema lifecycle commands. + +The gateway binary embeds a set of SQL migrations. Each migration is numbered; +the highest number is the schema version the binary requires. After deploying +a new gateway binary, run 'orama node schema apply' on every namespace's RQLite +to bring the schema up to date — otherwise function deploys fail at runtime +with cryptic missing-column errors.`, +} + +var schemaStatusCmd = &cobra.Command{ + Use: "status", + Short: "Show required vs applied schema version + pending migrations", + RunE: func(cmd *cobra.Command, args []string) error { + db, dsn, err := openSchemaDB() + if err != nil { + return err + } + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + applied, err := migrations.AppliedVersion(ctx, db) + if err != nil { + return fmt.Errorf("query applied version: %w", err) + } + required := migrations.RequiredVersion() + pending, err := migrations.PendingMigrations(ctx, db) + if err != nil { + return fmt.Errorf("compute pending: %w", err) + } + + fmt.Printf("Connection: %s\n", dsn) + fmt.Printf("Required version: %d (highest migration in binary)\n", required) + fmt.Printf("Applied version: %d\n", applied) + switch { + case applied == required: + fmt.Printf("Status: ✓ up to date\n") + case applied > required: + fmt.Printf("Status: ⚠ database AHEAD of binary (%d > %d) — newer binary in cluster?\n", + applied, required) + default: + fmt.Printf("Status: ✗ BEHIND — %d migration(s) pending\n", len(pending)) + } + + if len(pending) > 0 { + fmt.Println("\nPending migrations:") + for _, m := range pending { + fmt.Printf(" %03d %s\n", m.Version, m.Name) + } + fmt.Println("\nRun 'sudo orama node schema apply' to apply them.") + } + return nil + }, +} + +var schemaApplyCmd = &cobra.Command{ + Use: "apply", + Short: "Apply pending migrations to the local RQLite", + Long: `Apply every embedded migration not yet recorded in schema_migrations. + +ALTER TABLE statements that target an already-existing column are tolerated +(the migration is marked complete). Other errors abort the run with the +schema in a partially-applied state — re-running is safe because each +migration is independently versioned.`, + RunE: func(cmd *cobra.Command, args []string) error { + db, dsn, err := openSchemaDB() + if err != nil { + return err + } + defer db.Close() + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + pending, err := migrations.PendingMigrations(ctx, db) + if err != nil { + return fmt.Errorf("compute pending: %w", err) + } + if len(pending) == 0 { + fmt.Printf("No pending migrations. Schema is at version %d.\n", migrations.RequiredVersion()) + return nil + } + + fmt.Printf("Will apply %d migration(s) to %s:\n", len(pending), dsn) + for _, m := range pending { + fmt.Printf(" %03d %s\n", m.Version, m.Name) + } + + if !schemaYes { + fmt.Print("\nProceed? [y/N]: ") + var ans string + _, _ = fmt.Scanln(&ans) + if strings.ToLower(strings.TrimSpace(ans)) != "y" { + fmt.Println("Aborted.") + return nil + } + } + + // Use the existing migration runner — it does the same thing the + // gateway does at startup, with idempotent-error tolerance. + logger, _ := zap.NewProduction() + defer func() { _ = logger.Sync() }() + + if err := rqlite.ApplyEmbeddedMigrations(ctx, db, migrations.FS, logger); err != nil { + return fmt.Errorf("apply failed: %w", err) + } + + // Verify post-apply. + if err := migrations.AssertSchema(ctx, db); err != nil { + return fmt.Errorf("apply completed but schema still lags: %w", err) + } + + fmt.Printf("\n✓ Schema now at version %d.\n", migrations.RequiredVersion()) + return nil + }, +} + +// openSchemaDB returns a *sql.DB connected to the local RQLite instance, +// using the --dsn flag if provided, else discovering from the node config +// or falling back to localhost:5001. +func openSchemaDB() (*sql.DB, string, error) { + dsn := schemaDSN + if dsn == "" { + dsn = discoverLocalRQLiteDSN() + } + db, err := sql.Open("rqlite", dsn) + if err != nil { + return nil, "", fmt.Errorf("open rqlite: %w", err) + } + // Quick liveness check so we fail fast with a clear error. + pingCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + if err := db.PingContext(pingCtx); err != nil { + _ = db.Close() + return nil, "", fmt.Errorf("rqlite at %s unreachable: %w "+ + "(hint: is RQLite running? try 'orama node status')", dsn, err) + } + return db, dsn, nil +} + +// discoverLocalRQLiteDSN reads the node config to find the local RQLite +// port + credentials, falling back to localhost:5001 with no auth. +func discoverLocalRQLiteDSN() string { + const fallback = "http://localhost:5001" + + cfgPath, err := config.DefaultPath("node.yaml") + if err != nil { + return fallback + } + if _, err := os.Stat(cfgPath); err != nil { + return fallback + } + cfgDir := filepath.Dir(cfgPath) + + // Try to read RQLite credentials from the standard secrets path. + user, pass := readRQLiteCreds(cfgDir) + + port := readRQLitePortFromConfig(cfgPath) + if port == 0 { + port = 5001 + } + if user == "" { + return fmt.Sprintf("http://localhost:%d", port) + } + return fmt.Sprintf("http://%s:%s@localhost:%d", user, pass, port) +} + +// readRQLiteCreds best-effort reads the user:pass from secrets files +// adjacent to the node config. Returns ("","") on any miss; the caller +// then connects without auth (which works on a local-only instance). +func readRQLiteCreds(cfgDir string) (string, string) { + type pair struct{ userFile, passFile string } + candidates := []pair{ + {filepath.Join(cfgDir, "secrets", "rqlite-user"), filepath.Join(cfgDir, "secrets", "rqlite-password")}, + } + for _, c := range candidates { + u, err := os.ReadFile(c.userFile) + if err != nil { + continue + } + p, err := os.ReadFile(c.passFile) + if err != nil { + continue + } + return strings.TrimSpace(string(u)), strings.TrimSpace(string(p)) + } + return "", "" +} + +// readRQLitePortFromConfig is a tiny YAML peek for `database.rqlite_port`. +// Avoids pulling the whole config loader; failure returns 0 → fallback used. +func readRQLitePortFromConfig(path string) int { + data, err := os.ReadFile(path) + if err != nil { + return 0 + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "rqlite_port:") { + continue + } + var port int + _, err := fmt.Sscanf(line, "rqlite_port: %d", &port) + if err == nil { + return port + } + } + return 0 +} + +func init() { + schemaCmd.PersistentFlags().StringVar(&schemaDSN, "dsn", "", + "RQLite DSN (default: discover from node config or localhost:5001)") + schemaApplyCmd.Flags().BoolVar(&schemaYes, "yes", false, + "Skip the confirmation prompt") + + schemaCmd.AddCommand(schemaStatusCmd) + schemaCmd.AddCommand(schemaApplyCmd) +} diff --git a/core/pkg/cli/cmd/node/setup.go b/core/pkg/cli/cmd/node/setup.go new file mode 100644 index 0000000..899924b --- /dev/null +++ b/core/pkg/cli/cmd/node/setup.go @@ -0,0 +1,47 @@ +package node + +import ( + "github.com/DeBrosOfficial/network/pkg/cli/production/setup" + "github.com/spf13/cobra" +) + +var setupOpts setup.Options + +var setupCmd = &cobra.Command{ + Use: "setup", + Short: "Set up a fresh VPS as an Orama node", + Long: `Bootstrap a fresh VPS into a running Orama node in one command. + +Creates an SSH key in rootwallet, installs it on the VPS, uploads the binary +archive, and runs the node install. For the first node, use --genesis to +create a new cluster. + +Examples: + # Genesis node (first node, creates new cluster) + orama node setup --ip 1.2.3.4 --password 'vps-pass' --env devnet \ + --base-domain orama-devnet.network --role nameserver --genesis + + # Join existing cluster + orama node setup --ip 5.6.7.8 --password 'vps-pass' --env devnet \ + --base-domain orama-devnet.network + + # Join as nameserver + orama node setup --ip 9.10.11.12 --password 'vps-pass' --env devnet \ + --base-domain orama-devnet.network --role nameserver`, + RunE: func(cmd *cobra.Command, args []string) error { + return setup.Run(setupOpts) + }, +} + +func init() { + setupCmd.Flags().StringVar(&setupOpts.IP, "ip", "", "Public IP address of the VPS (required)") + setupCmd.Flags().StringVar(&setupOpts.Env, "env", "", "Target environment (default: active)") + setupCmd.Flags().StringVar(&setupOpts.Role, "role", "node", "Node role: node or nameserver") + setupCmd.Flags().StringVar(&setupOpts.User, "user", "root", "SSH user on the VPS") + setupCmd.Flags().StringVar(&setupOpts.Password, "password", "", "One-time password for initial SSH access") + setupCmd.Flags().StringVar(&setupOpts.BaseDomain, "base-domain", "", "Base domain for the network") + setupCmd.Flags().StringVar(&setupOpts.Gateway, "gateway", "", "Gateway URL for invite tokens (e.g., http://1.2.3.4)") + setupCmd.Flags().BoolVar(&setupOpts.Genesis, "genesis", false, "Create a new cluster (first node)") + setupCmd.Flags().BoolVar(&setupOpts.AnyoneRelay, "anyone-relay", false, "Run as Anyone relay operator") + setupCmd.MarkFlagRequired("ip") +} diff --git a/core/pkg/cli/cmd/nodescmd/nodes.go b/core/pkg/cli/cmd/nodescmd/nodes.go new file mode 100644 index 0000000..3811768 --- /dev/null +++ b/core/pkg/cli/cmd/nodescmd/nodes.go @@ -0,0 +1,58 @@ +package nodescmd + +import ( + "fmt" + "os" + "text/tabwriter" + + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/noderesolver" + "github.com/spf13/cobra" +) + +var envFlag string + +// Cmd is the top-level "nodes" command — lists operator's nodes. +var Cmd = &cobra.Command{ + Use: "nodes", + Short: "List your nodes across environments", + Long: `List all nodes owned by your wallet. Queries the network API +with your stored credentials, falling back to nodes.conf. + +Requires: orama auth login (for API-based resolution)`, + RunE: func(cmd *cobra.Command, args []string) error { + env := envFlag + if env == "" { + active, err := cli.GetActiveEnvironment() + if err != nil { + return fmt.Errorf("failed to get active environment: %w", err) + } + env = active.Name + } + + nodes, err := noderesolver.ResolveNodes(env) + if err != nil { + return fmt.Errorf("failed to resolve nodes: %w", err) + } + + if len(nodes) == 0 { + fmt.Printf("No nodes found for environment %q\n", env) + fmt.Println("Register nodes with: orama node setup --env", env) + return nil + } + + w := tabwriter.NewWriter(os.Stdout, 0, 4, 2, ' ', 0) + fmt.Fprintf(w, "IP\tROLE\tUSER\tENVIRONMENT\n") + for _, n := range nodes { + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", n.Host, n.Role, n.User, n.Environment) + } + w.Flush() + + fmt.Printf("\n%d node(s) in %s\n", len(nodes), env) + return nil + }, +} + +func init() { + Cmd.Flags().StringVar(&envFlag, "env", "", "Filter by environment (default: active environment)") +} diff --git a/core/pkg/cli/cmd/pushcmd/push.go b/core/pkg/cli/cmd/pushcmd/push.go new file mode 100644 index 0000000..939b020 --- /dev/null +++ b/core/pkg/cli/cmd/pushcmd/push.go @@ -0,0 +1,259 @@ +package pushcmd + +import ( + "encoding/base64" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/noderesolver" + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" + "github.com/spf13/cobra" +) + +var ( + envFlag string + ipFlag string + userFlag string + fanoutFlag bool +) + +// Cmd is the top-level "push" command — upload binary archive to nodes. +var Cmd = &cobra.Command{ + Use: "push", + Short: "Push binary archive to your nodes", + Long: `Upload the pre-built binary archive to nodes and extract it. + +By default, uploads from your machine to each node sequentially. +Use --fanout to upload to one node, then fan out server-to-server (faster). + +Examples: + orama push --ip 1.2.3.4 # Push to one node + orama push --env devnet # Sequential push to all devnet nodes + orama push --env devnet --fanout # Fan out server-to-server (faster)`, + RunE: func(cmd *cobra.Command, args []string) error { + archivePath := findNewestArchive() + if archivePath == "" { + return fmt.Errorf("no binary archive found in /tmp/ (run `orama build` first)") + } + info, err := os.Stat(archivePath) + if err != nil { + return fmt.Errorf("stat archive: %w", err) + } + fmt.Printf("Archive: %s (%s)\n", filepath.Base(archivePath), formatBytes(info.Size())) + + var nodes []inspector.Node + + if ipFlag != "" { + user := userFlag + if user == "" { + user = "root" + } + vaultTarget := fmt.Sprintf("%s/%s", ipFlag, user) + env := envFlag + if env == "" { + active, _ := cli.GetActiveEnvironment() + if active != nil { + env = active.Name + } + } + if env == "sandbox" { + vaultTarget = "sandbox/root" + } + nodes = []inspector.Node{{ + Host: ipFlag, User: user, VaultTarget: vaultTarget, Environment: env, + }} + } else { + env := envFlag + if env == "" { + active, err := cli.GetActiveEnvironment() + if err != nil { + return fmt.Errorf("no --ip or --env specified and no active environment") + } + env = active.Name + } + resolved, err := noderesolver.ResolveNodes(env) + if err != nil { + return fmt.Errorf("failed to resolve nodes: %w", err) + } + if len(resolved) == 0 { + return fmt.Errorf("no nodes found for environment %q", env) + } + nodes = resolved + } + + // Prepare SSH keys + cleanup, err := remotessh.PrepareNodeKeys(nodes) + if err != nil { + return fmt.Errorf("failed to prepare SSH keys: %w", err) + } + defer cleanup() + + // Single node or default: upload sequentially + if len(nodes) == 1 || !fanoutFlag { + return pushDirect(nodes, archivePath) + } + + // Multi-node with --fanout: upload to hub, fan out server-to-server + return pushFanout(nodes, archivePath) + }, +} + +func init() { + Cmd.Flags().StringVar(&envFlag, "env", "", "Target environment (default: active)") + Cmd.Flags().StringVar(&ipFlag, "ip", "", "Push to a single node by IP") + Cmd.Flags().StringVar(&userFlag, "user", "", "SSH user (default: root)") + Cmd.Flags().BoolVar(&fanoutFlag, "fanout", false, "Upload to first node, then fan out server-to-server (faster)") +} + +// pushDirect uploads the archive from local machine to each node sequentially. +func pushDirect(nodes []inspector.Node, archivePath string) error { + fmt.Printf("Pushing to %d node(s) (direct)...\n\n", len(nodes)) + + remotePath := "/tmp/" + filepath.Base(archivePath) + extractCmd := fmt.Sprintf("sudo bash -c 'mkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s && /opt/orama/bin/orama version'", remotePath, remotePath) + + for _, n := range nodes { + fmt.Printf(" %s: uploading...", n.Host) + if err := remotessh.UploadFile(n, archivePath, remotePath); err != nil { + fmt.Printf(" FAILED (%v)\n", err) + continue + } + fmt.Printf(" extracting...") + if err := remotessh.RunSSHStreaming(n, extractCmd); err != nil { + fmt.Printf(" FAILED (%v)\n", err) + continue + } + fmt.Println(" OK") + } + + fmt.Println("\nPush complete") + return nil +} + +// pushFanout uploads the archive to the first node (hub), then fans out +// server-to-server: the hub SCPs the archive to all other nodes in parallel +// and SSHes in to extract it. +// +// Key design — no SSH agent forwarding: +// +// The previous implementation loaded all N node keys into the system ssh-agent +// and used agent forwarding (-A) so the hub could reach targets. That caused +// "Too many authentication failures": when the hub connected to a target, the +// forwarded agent offered all N keys sequentially; if N exceeds the server's +// MaxAuthTries (default 6 on most distros), the server disconnects before the +// correct key is tried. +// +// Fix: PrepareNodeKeys (called by the parent command) already fetched and wrote +// each node's private key to a temp file (node.SSHKey). We base64-encode each +// key and embed it directly in the fanout bash script. On the hub, the script +// writes each key to its own mktemp file, uses -o IdentitiesOnly=yes -i $K +// (only ONE key offered per connection), and deletes the temp file immediately. +// No ssh-agent involved on either end; MaxAuthTries is irrelevant. +func pushFanout(nodes []inspector.Node, archivePath string) error { + fmt.Printf("Pushing to %d node(s) (fanout)...\n\n", len(nodes)) + + hub := nodes[0] + targets := nodes[1:] + remotePath := "/tmp/" + filepath.Base(archivePath) + + // Hub keeps the archive on disk after extracting — targets SCP it from here. + // The archive is removed from the hub at the end of this function. + hubExtractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama", remotePath) + + // Upload archive to hub + fmt.Printf(" %s (hub): uploading...", hub.Host) + if err := remotessh.UploadFile(hub, archivePath, remotePath); err != nil { + return fmt.Errorf("failed to upload to hub %s: %w", hub.Host, err) + } + fmt.Printf(" extracting...") + if err := remotessh.RunSSHStreaming(hub, "sudo bash -c '"+hubExtractCmd+"'"); err != nil { + return fmt.Errorf("failed to extract on hub %s: %w", hub.Host, err) + } + fmt.Println(" OK") + + // Build the fanout script. Each target gets its own shell subshell that: + // 1. Writes the target's SSH private key to a mktemp file ($K). + // 2. SCPs the archive from the hub's local path to the target. + // 3. SSHes into the target to extract it. + // 4. Deletes $K — key material never lingers on the hub. + // + // All subshells run in parallel (&); a final `wait` collects them. + // The entire script is base64-encoded before transmission to avoid shell + // quoting conflicts (the script contains both single and double quotes). + var fanoutParts []string + for _, t := range targets { + keyBytes, err := os.ReadFile(t.SSHKey) + if err != nil { + // SSHKey was populated by PrepareNodeKeys; this should never + // happen unless the temp file was somehow deleted mid-run. + fmt.Printf(" Warning: could not read key for %s: %v (skipping)\n", t.Host, err) + continue + } + // base64 alphabet is [A-Za-z0-9+/=] — no shell metacharacters — + // safe to embed in single-quoted strings inside the script. + keyB64 := base64.StdEncoding.EncodeToString(keyBytes) + + part := fmt.Sprintf( + "(K=$(mktemp) && echo '%s' | base64 -d >\"$K\" && chmod 600 \"$K\" && "+ + "scp -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30 -o IdentitiesOnly=yes -i \"$K\" %s %s@%s:%s && "+ + "ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30 -o IdentitiesOnly=yes -i \"$K\" %s@%s "+ + "'sudo bash -c \"mkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s\"' && "+ + "rm -f \"$K\" && echo '%s: done' || (rm -f \"$K\" 2>/dev/null; echo '%s: FAILED')) &", + keyB64, + remotePath, t.User, t.Host, remotePath, + t.User, t.Host, + remotePath, remotePath, + t.Host, t.Host, + ) + fanoutParts = append(fanoutParts, part) + } + fanoutParts = append(fanoutParts, "wait", "echo 'Fanout complete'") + fanoutScript := strings.Join(fanoutParts, "\n") + + encoded := base64.StdEncoding.EncodeToString([]byte(fanoutScript)) + runCmd := fmt.Sprintf("echo %s | base64 -d | bash", encoded) + + fmt.Printf(" Fanning out to %d nodes from %s...\n", len(targets), hub.Host) + // No agent forwarding — hub authenticates to each target using only + // that target's key (embedded above). IdentitiesOnly=yes ensures the + // hub's own host key is never accidentally offered to other nodes. + if err := remotessh.RunSSHStreaming(hub, runCmd); err != nil { + fmt.Printf(" Fanout failed: %v\n", err) + fmt.Println(" Some nodes may not have been updated") + } + + // Clean up archive on hub + remotessh.RunSSHStreaming(hub, "rm -f "+remotePath) + + fmt.Println("\nPush complete") + return nil +} + +func findNewestArchive() string { + matches, _ := filepath.Glob("/tmp/orama-*-linux-*.tar.gz") + if len(matches) == 0 { + return "" + } + sort.Slice(matches, func(i, j int) bool { + fi, _ := os.Stat(matches[i]) + fj, _ := os.Stat(matches[j]) + if fi == nil || fj == nil { + return false + } + return fi.ModTime().After(fj.ModTime()) + }) + return matches[0] +} + +func formatBytes(b int64) string { + const mb = 1024 * 1024 + if b >= mb { + return fmt.Sprintf("%.1f MB", float64(b)/float64(mb)) + } + return fmt.Sprintf("%d KB", b/1024) +} diff --git a/core/pkg/cli/cmd/rolloutcmd/rollout.go b/core/pkg/cli/cmd/rolloutcmd/rollout.go new file mode 100644 index 0000000..40f9717 --- /dev/null +++ b/core/pkg/cli/cmd/rolloutcmd/rollout.go @@ -0,0 +1,234 @@ +package rolloutcmd + +import ( + "context" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/noderesolver" + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" + "github.com/spf13/cobra" +) + +var ( + envFlag string + delaySec int +) + +// Cmd is the top-level "rollout" command — build + push + rolling upgrade. +var Cmd = &cobra.Command{ + Use: "rollout", + Short: "Rolling upgrade of your nodes", + Long: `Build, push, and perform a rolling upgrade on all your nodes in an environment. +Upgrades followers first, leader last, with health checks between each node.`, + RunE: func(cmd *cobra.Command, args []string) error { + env := envFlag + if env == "" { + active, err := cli.GetActiveEnvironment() + if err != nil { + return fmt.Errorf("failed to get active environment: %w", err) + } + env = active.Name + } + + nodes, err := noderesolver.ResolveNodes(env) + if err != nil { + return fmt.Errorf("failed to resolve nodes: %w", err) + } + if len(nodes) == 0 { + return fmt.Errorf("no nodes found for environment %q", env) + } + + cleanup, err := remotessh.PrepareNodeKeys(nodes) + if err != nil { + return fmt.Errorf("failed to prepare SSH keys: %w", err) + } + defer cleanup() + + fmt.Printf("Rolling out to %d node(s) in %s\n\n", len(nodes), env) + + // Step 1: Find archive + archivePath := findNewestArchive() + if archivePath == "" { + return fmt.Errorf("no binary archive found in /tmp/ (run `orama build` first)") + } + info, err := os.Stat(archivePath) + if err != nil { + return fmt.Errorf("stat archive %s: %w", archivePath, err) + } + fmt.Printf("Archive: %s (%s)\n\n", filepath.Base(archivePath), formatBytes(info.Size())) + + // Step 2: Push archive to all nodes + fmt.Println("Pushing archive to all nodes...") + if err := pushArchive(nodes, archivePath); err != nil { + return err + } + + // Step 3: Rolling upgrade — followers first, leader last + fmt.Println("\nRolling upgrade (followers first, leader last)...") + + leaderIdx := findLeaderIndex(nodes) + if leaderIdx < 0 { + fmt.Fprintf(os.Stderr, " Warning: could not detect RQLite leader, upgrading in order\n") + } + + // Determine SSH options based on environment + var sshOpts []remotessh.SSHOption + if env == "sandbox" { + sshOpts = append(sshOpts, remotessh.WithNoHostKeyCheck()) + } + + delay := time.Duration(delaySec) * time.Second + + // Upgrade non-leaders first + count := 0 + for i := range nodes { + if i == leaderIdx { + continue + } + count++ + if err := upgradeNode(nodes[i], count, len(nodes), sshOpts); err != nil { + return err + } + if count < len(nodes) { + fmt.Printf(" Waiting %s before next node...\n", delay) + time.Sleep(delay) + } + } + + // Upgrade leader last + if leaderIdx >= 0 { + count++ + if err := upgradeNode(nodes[leaderIdx], count, len(nodes), sshOpts); err != nil { + return err + } + } + + fmt.Printf("\nRollout complete for %s (%d nodes)\n", env, len(nodes)) + return nil + }, +} + +func init() { + Cmd.Flags().StringVar(&envFlag, "env", "", "Environment (default: active)") + Cmd.Flags().IntVar(&delaySec, "delay", 30, "Seconds to wait between node upgrades") +} + +// findLeaderIndex returns the index of the RQLite leader, or -1 if unknown. +func findLeaderIndex(nodes []inspector.Node) int { + for i, n := range nodes { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + result := inspector.RunSSH(ctx, n, "curl -sf http://localhost:5001/status 2>/dev/null | grep -o '\"state\":\"[^\"]*\"'") + cancel() + if result.OK() && strings.Contains(result.Stdout, "Leader") { + return i + } + } + return -1 +} + +// upgradeNode performs orama node upgrade --restart on a single node. +func upgradeNode(node inspector.Node, current, total int, sshOpts []remotessh.SSHOption) error { + fmt.Printf(" [%d/%d] Upgrading %s...\n", current, total, node.Host) + + // Pre-replace orama CLI binary to avoid ETXTBSY + preReplace := "rm -f /usr/local/bin/orama && cp /opt/orama/bin/orama /usr/local/bin/orama" + if err := remotessh.RunSSHStreaming(node, preReplace, sshOpts...); err != nil { + return fmt.Errorf("pre-replace orama binary on %s: %w", node.Host, err) + } + + if err := remotessh.RunSSHStreaming(node, "orama node upgrade --restart", sshOpts...); err != nil { + return fmt.Errorf("upgrade %s: %w", node.Host, err) + } + + // Wait for health + fmt.Printf(" Checking health...") + if err := waitForHealth(node, 2*time.Minute); err != nil { + fmt.Printf(" WARN: %v\n", err) + } else { + fmt.Println(" OK") + } + + return nil +} + +// pushArchive uploads the archive to the first node, then fans out server-to-server. +func pushArchive(nodes []inspector.Node, archivePath string) error { + if len(nodes) == 0 { + return nil + } + + remotePath := "/tmp/" + filepath.Base(archivePath) + + // Upload to first node + hub := nodes[0] + fmt.Printf(" Uploading to %s...\n", hub.Host) + if err := remotessh.UploadFile(hub, archivePath, remotePath); err != nil { + return fmt.Errorf("upload to %s: %w", hub.Host, err) + } + + // Extract on hub + extractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s", remotePath, remotePath) + if err := remotessh.RunSSHStreaming(hub, extractCmd); err != nil { + return fmt.Errorf("extract on %s: %w", hub.Host, err) + } + + // For remaining nodes, upload directly and extract + for _, n := range nodes[1:] { + fmt.Printf(" Uploading to %s...\n", n.Host) + if err := remotessh.UploadFile(n, archivePath, remotePath); err != nil { + return fmt.Errorf("upload to %s: %w", n.Host, err) + } + if err := remotessh.RunSSHStreaming(n, extractCmd); err != nil { + return fmt.Errorf("extract on %s: %w", n.Host, err) + } + } + + return nil +} + +// waitForHealth polls RQLite health on a node until it reaches Leader or Follower state. +func waitForHealth(node inspector.Node, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + result := inspector.RunSSH(ctx, node, "curl -sf http://localhost:5001/status 2>/dev/null | grep -o '\"state\":\"[^\"]*\"'") + cancel() + if result.OK() && (strings.Contains(result.Stdout, "Leader") || strings.Contains(result.Stdout, "Follower")) { + return nil + } + time.Sleep(3 * time.Second) + } + return fmt.Errorf("timed out waiting for healthy state on %s", node.Host) +} + +// findNewestArchive finds the newest orama binary archive in /tmp/. +func findNewestArchive() string { + matches, err := filepath.Glob("/tmp/orama-*-linux-*.tar.gz") + if err != nil || len(matches) == 0 { + return "" + } + sort.Slice(matches, func(i, j int) bool { + fi, _ := os.Stat(matches[i]) + fj, _ := os.Stat(matches[j]) + if fi == nil || fj == nil { + return false + } + return fi.ModTime().After(fj.ModTime()) + }) + return matches[0] +} + +func formatBytes(b int64) string { + const mb = 1024 * 1024 + if b >= mb { + return fmt.Sprintf("%.1f MB", float64(b)/float64(mb)) + } + return fmt.Sprintf("%d KB", b/1024) +} diff --git a/core/pkg/cli/cmd/sshcmd/ssh.go b/core/pkg/cli/cmd/sshcmd/ssh.go new file mode 100644 index 0000000..75796b9 --- /dev/null +++ b/core/pkg/cli/cmd/sshcmd/ssh.go @@ -0,0 +1,101 @@ +package sshcmd + +import ( + "fmt" + "os" + "os/exec" + + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/noderesolver" + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" + "github.com/spf13/cobra" +) + +var envFlag string + +// Cmd is the top-level "ssh" command — SSH into any node by IP or hostname. +var Cmd = &cobra.Command{ + Use: "ssh [-- command]", + Short: "SSH into a node", + Long: `SSH into a node by IP address or hostname. +Resolves the SSH key from rootwallet automatically. + +Pass a command after the IP to run it non-interactively: + orama ssh 1.2.3.4 'sudo systemctl status orama-node'`, + Args: cobra.MinimumNArgs(1), + DisableFlagParsing: false, + RunE: func(cmd *cobra.Command, args []string) error { + target := args[0] + remoteCmd := "" + if len(args) > 1 { + remoteCmd = args[1] + } + + env := envFlag + if env == "" { + active, err := cli.GetActiveEnvironment() + if err != nil { + return fmt.Errorf("failed to get active environment: %w", err) + } + env = active.Name + } + + // Resolve nodes to find the target + nodes, err := noderesolver.ResolveNodes(env) + if err != nil { + return fmt.Errorf("failed to resolve nodes: %w", err) + } + + // Match by IP + for _, n := range nodes { + if n.Host == target { + return sshInto(n, remoteCmd) + } + } + + // Not found — try direct SSH with default vault target + fmt.Printf("Node %q not found in %s nodes, attempting direct SSH...\n", target, env) + return sshInto(inspector.Node{ + Host: target, + User: "root", + VaultTarget: target + "/root", + }, remoteCmd) + }, +} + +func init() { + Cmd.Flags().StringVar(&envFlag, "env", "", "Environment to search (default: active)") +} + +func sshInto(node inspector.Node, remoteCmd string) error { + nodes := []inspector.Node{node} + cleanup, err := remotessh.PrepareNodeKeys(nodes) + if err != nil { + return fmt.Errorf("failed to resolve SSH key: %w", err) + } + defer cleanup() + + keyPath := nodes[0].SSHKey + + sshBin, err := exec.LookPath("ssh") + if err != nil { + return fmt.Errorf("ssh not found in PATH: %w", err) + } + + sshArgs := []string{ + "-i", keyPath, + "-o", "StrictHostKeyChecking=accept-new", + "-o", "IdentitiesOnly=yes", + fmt.Sprintf("%s@%s", node.User, node.Host), + } + if remoteCmd != "" { + sshArgs = append(sshArgs, remoteCmd) + } + + sshCmd := exec.Command(sshBin, sshArgs...) + sshCmd.Stdin = os.Stdin + sshCmd.Stdout = os.Stdout + sshCmd.Stderr = os.Stderr + return sshCmd.Run() +} diff --git a/core/pkg/cli/cmd/statuscmd/status.go b/core/pkg/cli/cmd/statuscmd/status.go new file mode 100644 index 0000000..0d9547d --- /dev/null +++ b/core/pkg/cli/cmd/statuscmd/status.go @@ -0,0 +1,143 @@ +package statuscmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "sync" + "text/tabwriter" + "time" + + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/noderesolver" + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" + "github.com/spf13/cobra" +) + +var ( + envFlag string + jsonFlag bool +) + +// Cmd is the top-level "status" command — health check for operator's nodes. +var Cmd = &cobra.Command{ + Use: "status", + Short: "Show health status of your nodes", + Long: `Check the health of all your nodes in an environment. +SSHes into each node and runs orama node report to collect health data.`, + RunE: func(cmd *cobra.Command, args []string) error { + env := envFlag + if env == "" { + active, err := cli.GetActiveEnvironment() + if err != nil { + return fmt.Errorf("failed to get active environment: %w", err) + } + env = active.Name + } + + nodes, err := noderesolver.ResolveNodes(env) + if err != nil { + return fmt.Errorf("failed to resolve nodes: %w", err) + } + + if len(nodes) == 0 { + fmt.Printf("No nodes found for environment %q\n", env) + return nil + } + + cleanup, err := remotessh.PrepareNodeKeys(nodes) + if err != nil { + return fmt.Errorf("failed to prepare SSH keys: %w", err) + } + defer cleanup() + + fmt.Printf("Checking %d node(s) in %s...\n\n", len(nodes), env) + + type nodeResult struct { + Host string `json:"host"` + Role string `json:"role"` + Status string `json:"status"` + Error string `json:"error,omitempty"` + } + + results := make([]nodeResult, len(nodes)) + var wg sync.WaitGroup + + for i, n := range nodes { + wg.Add(1) + go func(idx int, node inspector.Node) { + defer wg.Done() + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + result := inspector.RunSSH(ctx, node, "sudo orama node report --json") + nr := nodeResult{Host: node.Host, Role: node.Role} + + if !result.OK() { + nr.Status = "unreachable" + nr.Error = fmt.Sprintf("SSH failed (exit %d)", result.ExitCode) + if result.Stderr != "" { + nr.Error = result.Stderr + if len(nr.Error) > 100 { + nr.Error = nr.Error[:100] + "..." + } + } + results[idx] = nr + return + } + + var report struct { + Gateway struct { + Responsive bool `json:"responsive"` + } `json:"gateway"` + RQLite struct { + RaftState string `json:"raft_state"` + } `json:"rqlite"` + } + if err := json.Unmarshal([]byte(result.Stdout), &report); err != nil { + nr.Status = "unknown" + nr.Error = "failed to parse report" + results[idx] = nr + return + } + + if report.Gateway.Responsive && (report.RQLite.RaftState == "Leader" || report.RQLite.RaftState == "Follower") { + nr.Status = "healthy" + } else { + nr.Status = "degraded" + } + results[idx] = nr + }(i, n) + } + wg.Wait() + + if jsonFlag { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(results) + } + + w := tabwriter.NewWriter(os.Stdout, 0, 4, 2, ' ', 0) + fmt.Fprintf(w, "IP\tROLE\tSTATUS\tDETAILS\n") + healthy := 0 + for _, r := range results { + details := r.Error + if r.Status == "healthy" { + healthy++ + } + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", r.Host, r.Role, r.Status, details) + } + w.Flush() + + fmt.Printf("\n%d/%d nodes healthy\n", healthy, len(results)) + return nil + }, +} + +func init() { + Cmd.Flags().StringVar(&envFlag, "env", "", "Environment (default: active)") + Cmd.Flags().BoolVar(&jsonFlag, "json", false, "Output as JSON") +} diff --git a/core/pkg/cli/env_commands.go b/core/pkg/cli/env_commands.go index 6c9b8cb..bd8e67d 100644 --- a/core/pkg/cli/env_commands.go +++ b/core/pkg/cli/env_commands.go @@ -164,30 +164,8 @@ func handleEnvAdd(args []string) { os.Exit(1) } - envConfig, err := LoadEnvironmentConfig() - if err != nil { - fmt.Fprintf(os.Stderr, "❌ Failed to load environment config: %v\n", err) - os.Exit(1) - } - - // Check if environment already exists - for _, env := range envConfig.Environments { - if env.Name == name { - fmt.Fprintf(os.Stderr, "❌ Environment '%s' already exists\n", name) - os.Exit(1) - } - } - - // Add new environment - envConfig.Environments = append(envConfig.Environments, Environment{ - Name: name, - GatewayURL: gatewayURL, - Description: description, - IsActive: false, - }) - - if err := SaveEnvironmentConfig(envConfig); err != nil { - fmt.Fprintf(os.Stderr, "❌ Failed to save environment config: %v\n", err) + if err := AddEnvironment(name, gatewayURL, description); err != nil { + fmt.Fprintf(os.Stderr, "❌ Failed to add environment: %v\n", err) os.Exit(1) } @@ -206,37 +184,8 @@ func handleEnvRemove(args []string) { name := args[0] - envConfig, err := LoadEnvironmentConfig() - if err != nil { - fmt.Fprintf(os.Stderr, "❌ Failed to load environment config: %v\n", err) - os.Exit(1) - } - - // Find and remove environment - found := false - newEnvs := make([]Environment, 0, len(envConfig.Environments)) - for _, env := range envConfig.Environments { - if env.Name == name { - found = true - continue - } - newEnvs = append(newEnvs, env) - } - - if !found { - fmt.Fprintf(os.Stderr, "❌ Environment '%s' not found\n", name) - os.Exit(1) - } - - envConfig.Environments = newEnvs - - // If we removed the active environment, switch to devnet - if envConfig.ActiveEnvironment == name { - envConfig.ActiveEnvironment = "devnet" - } - - if err := SaveEnvironmentConfig(envConfig); err != nil { - fmt.Fprintf(os.Stderr, "❌ Failed to save environment config: %v\n", err) + if err := RemoveEnvironment(name); err != nil { + fmt.Fprintf(os.Stderr, "❌ Failed to remove environment: %v\n", err) os.Exit(1) } diff --git a/core/pkg/cli/environment.go b/core/pkg/cli/environment.go index b92bc5f..5a61737 100644 --- a/core/pkg/cli/environment.go +++ b/core/pkg/cli/environment.go @@ -45,8 +45,11 @@ var DefaultEnvironments = []Environment{ }, } -// GetEnvironmentConfigPath returns the path to the environment config file -func GetEnvironmentConfigPath() (string, error) { +// getEnvironmentConfigPathFn is the function used to resolve the config path. +// Tests override this to point at a temp file. +var getEnvironmentConfigPathFn = getEnvironmentConfigPathDefault + +func getEnvironmentConfigPathDefault() (string, error) { configDir, err := config.ConfigDir() if err != nil { return "", fmt.Errorf("failed to get config directory: %w", err) @@ -54,6 +57,11 @@ func GetEnvironmentConfigPath() (string, error) { return filepath.Join(configDir, "environments.json"), nil } +// GetEnvironmentConfigPath returns the path to the environment config file +func GetEnvironmentConfigPath() (string, error) { + return getEnvironmentConfigPathFn() +} + // LoadEnvironmentConfig loads the environment configuration func LoadEnvironmentConfig() (*EnvironmentConfig, error) { path, err := GetEnvironmentConfigPath() @@ -170,6 +178,63 @@ func GetEnvironmentByName(name string) (*Environment, error) { return nil, fmt.Errorf("environment '%s' not found", name) } +// AddEnvironment adds a new environment or updates an existing one. +// If an environment with the same name already exists, its gateway URL and +// description are updated in place. +func AddEnvironment(name, gatewayURL, description string) error { + envConfig, err := LoadEnvironmentConfig() + if err != nil { + return err + } + + for i, env := range envConfig.Environments { + if env.Name == name { + envConfig.Environments[i].GatewayURL = gatewayURL + envConfig.Environments[i].Description = description + return SaveEnvironmentConfig(envConfig) + } + } + + envConfig.Environments = append(envConfig.Environments, Environment{ + Name: name, + GatewayURL: gatewayURL, + Description: description, + }) + + return SaveEnvironmentConfig(envConfig) +} + +// RemoveEnvironment removes an environment by name. If the removed environment +// was active, the active environment falls back to "devnet". +func RemoveEnvironment(name string) error { + envConfig, err := LoadEnvironmentConfig() + if err != nil { + return err + } + + newEnvs := make([]Environment, 0, len(envConfig.Environments)) + found := false + for _, env := range envConfig.Environments { + if env.Name == name { + found = true + continue + } + newEnvs = append(newEnvs, env) + } + + if !found { + return nil // already absent, nothing to do + } + + envConfig.Environments = newEnvs + + if envConfig.ActiveEnvironment == name { + envConfig.ActiveEnvironment = "devnet" + } + + return SaveEnvironmentConfig(envConfig) +} + // InitializeEnvironments initializes the environment config with defaults func InitializeEnvironments() error { path, err := GetEnvironmentConfigPath() diff --git a/core/pkg/cli/environment_test.go b/core/pkg/cli/environment_test.go new file mode 100644 index 0000000..ff7cc17 --- /dev/null +++ b/core/pkg/cli/environment_test.go @@ -0,0 +1,131 @@ +package cli + +import ( + "encoding/json" + "os" + "testing" +) + +// writeTestConfig writes an EnvironmentConfig to a temp file and returns +// a helper that patches GetEnvironmentConfigPath to return that path. +// The returned cleanup restores the original function. +func writeTestConfig(t *testing.T, cfg *EnvironmentConfig) func() { + t.Helper() + + f, err := os.CreateTemp(t.TempDir(), "envconfig-*.json") + if err != nil { + t.Fatalf("create temp file: %v", err) + } + data, _ := json.MarshalIndent(cfg, "", " ") + if _, err := f.Write(data); err != nil { + t.Fatalf("write temp file: %v", err) + } + f.Close() + + origFn := getEnvironmentConfigPathFn + getEnvironmentConfigPathFn = func() (string, error) { return f.Name(), nil } + return func() { getEnvironmentConfigPathFn = origFn } +} + +func defaultTestConfig() *EnvironmentConfig { + return &EnvironmentConfig{ + Environments: []Environment{ + {Name: "sandbox", GatewayURL: "https://dbrs.space", Description: "Sandbox cluster"}, + {Name: "devnet", GatewayURL: "https://orama-devnet.network", Description: "Development network"}, + {Name: "testnet", GatewayURL: "https://orama-testnet.network", Description: "Test network"}, + }, + ActiveEnvironment: "sandbox", + } +} + +func TestAddEnvironment_new(t *testing.T) { + cleanup := writeTestConfig(t, defaultTestConfig()) + defer cleanup() + + if err := AddEnvironment("staging", "https://staging.example.com", "Staging env"); err != nil { + t.Fatalf("AddEnvironment: %v", err) + } + + env, err := GetEnvironmentByName("staging") + if err != nil { + t.Fatalf("GetEnvironmentByName: %v", err) + } + if env.GatewayURL != "https://staging.example.com" { + t.Errorf("GatewayURL = %q, want %q", env.GatewayURL, "https://staging.example.com") + } + if env.Description != "Staging env" { + t.Errorf("Description = %q, want %q", env.Description, "Staging env") + } +} + +func TestAddEnvironment_update(t *testing.T) { + cleanup := writeTestConfig(t, defaultTestConfig()) + defer cleanup() + + if err := AddEnvironment("sandbox", "https://new.example.com", "Updated sandbox"); err != nil { + t.Fatalf("AddEnvironment: %v", err) + } + + env, err := GetEnvironmentByName("sandbox") + if err != nil { + t.Fatalf("GetEnvironmentByName: %v", err) + } + if env.GatewayURL != "https://new.example.com" { + t.Errorf("GatewayURL = %q, want %q", env.GatewayURL, "https://new.example.com") + } + if env.Description != "Updated sandbox" { + t.Errorf("Description = %q, want %q", env.Description, "Updated sandbox") + } + + // Verify upsert didn't create a duplicate + cfg, _ := LoadEnvironmentConfig() + count := 0 + for _, e := range cfg.Environments { + if e.Name == "sandbox" { + count++ + } + } + if count != 1 { + t.Errorf("sandbox entries = %d, want 1", count) + } +} + +func TestRemoveEnvironment_existing(t *testing.T) { + cleanup := writeTestConfig(t, defaultTestConfig()) + defer cleanup() + + if err := RemoveEnvironment("testnet"); err != nil { + t.Fatalf("RemoveEnvironment: %v", err) + } + + _, err := GetEnvironmentByName("testnet") + if err == nil { + t.Error("expected error for removed environment, got nil") + } +} + +func TestRemoveEnvironment_absent(t *testing.T) { + cleanup := writeTestConfig(t, defaultTestConfig()) + defer cleanup() + + if err := RemoveEnvironment("nonexistent"); err != nil { + t.Errorf("RemoveEnvironment(absent) = %v, want nil", err) + } +} + +func TestRemoveEnvironment_active_falls_back(t *testing.T) { + cleanup := writeTestConfig(t, defaultTestConfig()) + defer cleanup() + + if err := RemoveEnvironment("sandbox"); err != nil { + t.Fatalf("RemoveEnvironment: %v", err) + } + + cfg, err := LoadEnvironmentConfig() + if err != nil { + t.Fatalf("LoadEnvironmentConfig: %v", err) + } + if cfg.ActiveEnvironment != "devnet" { + t.Errorf("ActiveEnvironment = %q, want %q", cfg.ActiveEnvironment, "devnet") + } +} diff --git a/core/pkg/cli/functions/helpers.go b/core/pkg/cli/functions/helpers.go index f0baf84..41a2b79 100644 --- a/core/pkg/cli/functions/helpers.go +++ b/core/pkg/cli/functions/helpers.go @@ -24,6 +24,14 @@ type FunctionConfig struct { Timeout int `yaml:"timeout"` Retry RetryConfig `yaml:"retry"` Env map[string]string `yaml:"env"` + + // Persistent WebSocket settings — when WSPersistent is true, the function + // must export ws_open / ws_frame / ws_close instead of running per-frame + // stateless. See core/plans/platform/06_PERSISTENT_WS_FUNCTIONS.md. + WSPersistent bool `yaml:"ws_persistent"` + WSIdleTimeoutSec int `yaml:"ws_idle_timeout_sec"` + WSMaxFrameBytes int `yaml:"ws_max_frame_bytes"` + WSMaxInflightPerConn int `yaml:"ws_max_inflight_per_conn"` } // RetryConfig holds retry settings. @@ -198,11 +206,28 @@ func uploadWASMFunction(wasmPath string, cfg *FunctionConfig) (map[string]interf writer.WriteField("retry_count", strconv.Itoa(cfg.Retry.Count)) writer.WriteField("retry_delay_seconds", strconv.Itoa(cfg.Retry.Delay)) - // Add env vars as metadata JSON + // Build metadata JSON. The deploy handler json.Unmarshal()s this into + // FunctionDefinition first, then overlays the explicit form fields below. + // Any field that has no explicit form-field equivalent (env vars, the + // ws_* persistent settings) MUST live in this blob. + metaObj := map[string]interface{}{} if len(cfg.Env) > 0 { - metadata, _ := json.Marshal(map[string]interface{}{ - "env_vars": cfg.Env, - }) + metaObj["env_vars"] = cfg.Env + } + if cfg.WSPersistent { + metaObj["ws_persistent"] = true + } + if cfg.WSIdleTimeoutSec > 0 { + metaObj["ws_idle_timeout_sec"] = cfg.WSIdleTimeoutSec + } + if cfg.WSMaxFrameBytes > 0 { + metaObj["ws_max_frame_bytes"] = cfg.WSMaxFrameBytes + } + if cfg.WSMaxInflightPerConn > 0 { + metaObj["ws_max_inflight_per_conn"] = cfg.WSMaxInflightPerConn + } + if len(metaObj) > 0 { + metadata, _ := json.Marshal(metaObj) writer.WriteField("metadata", string(metadata)) } diff --git a/core/pkg/cli/functions/logs.go b/core/pkg/cli/functions/logs.go index d9d4ae5..b11e11a 100644 --- a/core/pkg/cli/functions/logs.go +++ b/core/pkg/cli/functions/logs.go @@ -3,31 +3,58 @@ package functions import ( "fmt" "strconv" + "strings" "github.com/spf13/cobra" ) -var logsLimit int +var ( + logsLimit int + logsWASMOnly bool +) -// LogsCmd retrieves function execution logs. +// LogsCmd retrieves function invocation history. +// +// Default view: invocation history (always populated when the function has +// been invoked) — request_id, status, duration, error_message, plus any +// WASM-emitted log entries nested per record. +// +// --wasm-only switches to the legacy view that returns ONLY entries +// emitted by the function via log_info / log_error (often empty). var LogsCmd = &cobra.Command{ Use: "logs ", - Short: "Get execution logs for a function", - Long: "Retrieves the most recent execution logs for a deployed function.", - Args: cobra.ExactArgs(1), - RunE: runLogs, + Short: "Get invocation history for a function", + Long: `Retrieves the most recent invocations for a deployed function. + +Each invocation record shows: timestamp, request_id, status, duration_ms, +and (if any) the error message. WASM functions that emit log entries via +log_info / log_error have those entries nested under each record. + +Pass --wasm-only to retrieve only the WASM-emitted log lines (legacy +behavior; rarely useful on functions that don't call log_info).`, + Args: cobra.ExactArgs(1), + RunE: runLogs, } func init() { - LogsCmd.Flags().IntVar(&logsLimit, "limit", 50, "Maximum number of log entries to retrieve") + LogsCmd.Flags().IntVar(&logsLimit, "limit", 50, "Maximum number of records to retrieve") + LogsCmd.Flags().BoolVar(&logsWASMOnly, "wasm-only", false, + "Show only WASM-emitted log entries (legacy view)") } func runLogs(cmd *cobra.Command, args []string) error { name := args[0] endpoint := "/v1/functions/" + name + "/logs" + q := []string{} if logsLimit > 0 { - endpoint += "?limit=" + strconv.Itoa(logsLimit) + q = append(q, "limit="+strconv.Itoa(logsLimit)) + } + if logsWASMOnly { + q = append(q, "wasm_only=1") + } + if len(q) > 0 { + endpoint += "?" + strings.Join(q, "&") } result, err := apiGet(endpoint) @@ -35,9 +62,64 @@ func runLogs(cmd *cobra.Command, args []string) error { return err } + if logsWASMOnly { + return printWASMLogs(name, result) + } + return printInvocations(name, result) +} + +// printInvocations renders the default invocation-history view. +func printInvocations(name string, result map[string]interface{}) error { + invs, ok := result["invocations"].([]interface{}) + if !ok || len(invs) == 0 { + fmt.Printf("No invocations found for function %q.\n", name) + return nil + } + + for _, entry := range invs { + inv, ok := entry.(map[string]interface{}) + if !ok { + continue + } + started := valStr(inv, "started_at") + status := valStr(inv, "status") + reqID := valStr(inv, "request_id") + duration := valNumberAsString(inv, "duration_ms") + errMsg := valStr(inv, "error_message") + + // Header line per invocation. + fmt.Printf("[%s] %s request=%s duration=%sms\n", + started, strings.ToUpper(status), reqID, duration) + if errMsg != "" { + fmt.Printf(" error: %s\n", errMsg) + } + + // Nested WASM logs (if any). + if wasmLogs, ok := inv["wasm_logs"].([]interface{}); ok { + for _, l := range wasmLogs { + le, ok := l.(map[string]interface{}) + if !ok { + continue + } + fmt.Printf(" %s [%s] %s\n", + valStr(le, "timestamp"), + strings.ToUpper(valStr(le, "level")), + valStr(le, "message")) + } + } + } + + fmt.Printf("\nShowing %d invocation(s). Use --wasm-only for the legacy log-line view.\n", + len(invs)) + return nil +} + +// printWASMLogs renders the legacy WASM-only view. +func printWASMLogs(name string, result map[string]interface{}) error { logs, ok := result["logs"].([]interface{}) if !ok || len(logs) == 0 { - fmt.Printf("No logs found for function %q.\n", name) + fmt.Printf("No WASM-emitted logs found for function %q. "+ + "Tip: drop --wasm-only to see invocation history.\n", name) return nil } @@ -55,3 +137,23 @@ func runLogs(cmd *cobra.Command, args []string) error { fmt.Printf("\nShowing %d log(s)\n", len(logs)) return nil } + +// valNumberAsString formats a JSON number field as a clean integer string. +func valNumberAsString(m map[string]interface{}, key string) string { + v, ok := m[key] + if !ok || v == nil { + return "0" + } + switch n := v.(type) { + case float64: + return strconv.FormatInt(int64(n), 10) + case int: + return strconv.Itoa(n) + case int64: + return strconv.FormatInt(n, 10) + case string: + return n + default: + return fmt.Sprintf("%v", n) + } +} diff --git a/core/pkg/cli/functions/triggers.go b/core/pkg/cli/functions/triggers.go index 3b56e7f..3ee0053 100644 --- a/core/pkg/cli/functions/triggers.go +++ b/core/pkg/cli/functions/triggers.go @@ -6,34 +6,47 @@ import ( "fmt" "io" "text/tabwriter" + "time" "github.com/spf13/cobra" ) -var triggerTopic string +var ( + triggerTopic string + triggerSchedule string +) // TriggersCmd is the parent command for trigger management. var TriggersCmd = &cobra.Command{ Use: "triggers", - Short: "Manage function PubSub triggers", - Long: `Add, list, and delete PubSub triggers for your serverless functions. + Short: "Manage function PubSub and cron triggers", + Long: `Add, list, and delete triggers for your serverless functions. -When a message is published to a topic, all functions with a trigger on -that topic are automatically invoked with the message as input. +PubSub: when a message is published to a topic, every function with a +matching trigger is invoked with the message as input. + +Cron: a function is invoked on a schedule (5-field crontab, or 6-field +crontab with a leading seconds column). Examples: orama function triggers add my-function --topic calls:invite + orama function triggers add my-function --schedule "0 3 * * *" + orama function triggers add my-function --schedule "*/30 * * * * *" orama function triggers list my-function orama function triggers delete my-function `, } -// TriggersAddCmd adds a PubSub trigger to a function. +// TriggersAddCmd adds a PubSub or Cron trigger to a function. var TriggersAddCmd = &cobra.Command{ Use: "add ", - Short: "Add a PubSub trigger", - Long: "Registers a PubSub trigger so the function is invoked when a message is published to the topic.", - Args: cobra.ExactArgs(1), - RunE: runTriggersAdd, + Short: "Add a PubSub or Cron trigger", + Long: `Registers a trigger that invokes the function automatically. + +Pass exactly one of --topic (PubSub) or --schedule (cron). Schedules +accept either 5-field crontab (minute hour dom month dow) or 6-field +with seconds (sec minute hour dom month dow).`, + Args: cobra.ExactArgs(1), + RunE: runTriggersAdd, } // TriggersListCmd lists triggers for a function. @@ -57,15 +70,18 @@ func init() { TriggersCmd.AddCommand(TriggersListCmd) TriggersCmd.AddCommand(TriggersDeleteCmd) - TriggersAddCmd.Flags().StringVar(&triggerTopic, "topic", "", "PubSub topic to trigger on (required)") - TriggersAddCmd.MarkFlagRequired("topic") + TriggersAddCmd.Flags().StringVar(&triggerTopic, "topic", "", "PubSub topic to trigger on") + TriggersAddCmd.Flags().StringVar(&triggerSchedule, "schedule", "", "Cron expression to trigger on (e.g. \"0 3 * * *\")") + TriggersAddCmd.MarkFlagsMutuallyExclusive("topic", "schedule") + TriggersAddCmd.MarkFlagsOneRequired("topic", "schedule") } func runTriggersAdd(cmd *cobra.Command, args []string) error { funcName := args[0] body, _ := json.Marshal(map[string]string{ - "topic": triggerTopic, + "topic": triggerTopic, + "cron_expression": triggerSchedule, }) resp, err := apiRequest("POST", "/v1/functions/"+funcName+"/triggers", bytes.NewReader(body), "application/json") @@ -88,7 +104,11 @@ func runTriggersAdd(cmd *cobra.Command, args []string) error { return fmt.Errorf("failed to parse response: %w", err) } - fmt.Printf("Trigger added: %s → %s (id: %s)\n", triggerTopic, funcName, result["trigger_id"]) + if triggerSchedule != "" { + fmt.Printf("Trigger added: cron(%s) → %s (id: %s)\n", triggerSchedule, funcName, result["trigger_id"]) + } else { + fmt.Printf("Trigger added: %s → %s (id: %s)\n", triggerTopic, funcName, result["trigger_id"]) + } return nil } @@ -107,32 +127,83 @@ func runTriggersList(cmd *cobra.Command, args []string) error { } w := tabwriter.NewWriter(cmd.OutOrStdout(), 0, 0, 2, ' ', 0) - fmt.Fprintln(w, "ID\tTOPIC\tENABLED") + // Bug #65 audit: the previous CLI rendered only ID/TOPIC/ENABLED, so cron + // triggers appeared as mystery blank-topic rows. The handler returns a + // `kind` discriminator plus pubsub-only `topic` or cron-only + // `cron_expression` / `next_run_at` / `last_run_at`; the CLI now renders + // both kinds in a single unified table. + fmt.Fprintln(w, "ID\tKIND\tSCHEDULE/TOPIC\tNEXT RUN\tLAST RUN\tENABLED") for _, t := range triggers { tr, ok := t.(map[string]interface{}) if !ok { continue } - id, _ := tr["ID"].(string) - if id == "" { - id, _ = tr["id"].(string) + id := stringField(tr, "id", "ID") + kind := stringField(tr, "kind", "Kind") + // Backward compat: pre-#65 servers returned only `topic` with no + // `kind` field. Treat those as pubsub. + if kind == "" { + kind = "pubsub" } - topic, _ := tr["Topic"].(string) - if topic == "" { - topic, _ = tr["topic"].(string) + + var what, nextRun, lastRun string + switch kind { + case "cron": + what = stringField(tr, "cron_expression", "CronExpression") + nextRun = formatCronTimestamp(tr["next_run_at"]) + lastRun = formatCronTimestamp(tr["last_run_at"]) + default: // pubsub or unknown + what = stringField(tr, "topic", "Topic") + nextRun = "-" + lastRun = "-" } + enabled := true if e, ok := tr["Enabled"].(bool); ok { enabled = e } else if e, ok := tr["enabled"].(bool); ok { enabled = e } - fmt.Fprintf(w, "%s\t%s\t%v\n", id, topic, enabled) + fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%v\n", id, kind, what, nextRun, lastRun, enabled) } w.Flush() return nil } +// stringField pulls a string from a JSON-decoded map under any of the +// supplied keys, in order. The handler emits snake_case (`cron_expression`) +// while older Go-tagged structs may surface PascalCase — try both. +func stringField(m map[string]interface{}, keys ...string) string { + for _, k := range keys { + if v, ok := m[k].(string); ok && v != "" { + return v + } + } + return "" +} + +// formatCronTimestamp renders a JSON timestamp from the handler in a compact +// human-readable form. Returns "-" for nil / unparseable values so the CLI +// table stays aligned for never-run / pubsub rows. +func formatCronTimestamp(v interface{}) string { + if v == nil { + return "-" + } + s, ok := v.(string) + if !ok || s == "" { + return "-" + } + // Try RFC3339 first (Go's default time.Time JSON encoding); fall back to + // the raw string so unexpected formats don't disappear silently. + if ts, err := time.Parse(time.RFC3339, s); err == nil { + return ts.UTC().Format("2006-01-02 15:04:05 UTC") + } + if ts, err := time.Parse(time.RFC3339Nano, s); err == nil { + return ts.UTC().Format("2006-01-02 15:04:05 UTC") + } + return s +} + func runTriggersDelete(cmd *cobra.Command, args []string) error { funcName := args[0] triggerID := args[1] diff --git a/core/pkg/cli/functions/triggers_test.go b/core/pkg/cli/functions/triggers_test.go new file mode 100644 index 0000000..002bd2f --- /dev/null +++ b/core/pkg/cli/functions/triggers_test.go @@ -0,0 +1,114 @@ +package functions + +import ( + "testing" +) + +// ---------------------------------------------------------------------------- +// stringField — pulls value from JSON-decoded map under any of the given keys +// ---------------------------------------------------------------------------- + +func TestStringField_prefersFirstKey(t *testing.T) { + m := map[string]interface{}{ + "id": "first", + "ID": "second", + } + if got := stringField(m, "id", "ID"); got != "first" { + t.Errorf("stringField = %q, want %q", got, "first") + } +} + +func TestStringField_fallsThroughWhenFirstMissing(t *testing.T) { + m := map[string]interface{}{ + "ID": "second", + } + if got := stringField(m, "id", "ID"); got != "second" { + t.Errorf("stringField = %q, want %q", got, "second") + } +} + +func TestStringField_emptyValueSkipped(t *testing.T) { + // An empty string under the first key MUST fall through to subsequent + // keys, otherwise empty pubsub `topic` fields would shadow valid + // PascalCase `Topic`. + m := map[string]interface{}{ + "id": "", + "ID": "fallback", + } + if got := stringField(m, "id", "ID"); got != "fallback" { + t.Errorf("stringField = %q, want %q", got, "fallback") + } +} + +func TestStringField_nonStringValueSkipped(t *testing.T) { + m := map[string]interface{}{ + "id": 42, + "ID": "ok", + } + if got := stringField(m, "id", "ID"); got != "ok" { + t.Errorf("stringField = %q, want %q", got, "ok") + } +} + +func TestStringField_allMissingReturnsEmpty(t *testing.T) { + m := map[string]interface{}{"other": "value"} + if got := stringField(m, "id", "ID"); got != "" { + t.Errorf("stringField = %q, want empty", got) + } +} + +// ---------------------------------------------------------------------------- +// formatCronTimestamp — RFC3339 -> UTC display, "-" for missing/unparseable +// ---------------------------------------------------------------------------- + +func TestFormatCronTimestamp_nilReturnsDash(t *testing.T) { + if got := formatCronTimestamp(nil); got != "-" { + t.Errorf("formatCronTimestamp(nil) = %q, want %q", got, "-") + } +} + +func TestFormatCronTimestamp_emptyStringReturnsDash(t *testing.T) { + if got := formatCronTimestamp(""); got != "-" { + t.Errorf("formatCronTimestamp(\"\") = %q, want %q", got, "-") + } +} + +func TestFormatCronTimestamp_nonStringReturnsDash(t *testing.T) { + if got := formatCronTimestamp(42); got != "-" { + t.Errorf("formatCronTimestamp(42) = %q, want %q", got, "-") + } +} + +func TestFormatCronTimestamp_rfc3339(t *testing.T) { + got := formatCronTimestamp("2025-05-08T03:00:00Z") + want := "2025-05-08 03:00:00 UTC" + if got != want { + t.Errorf("formatCronTimestamp = %q, want %q", got, want) + } +} + +func TestFormatCronTimestamp_rfc3339Nano(t *testing.T) { + got := formatCronTimestamp("2025-05-08T03:00:00.123456789Z") + want := "2025-05-08 03:00:00 UTC" + if got != want { + t.Errorf("formatCronTimestamp nano = %q, want %q", got, want) + } +} + +func TestFormatCronTimestamp_rfc3339WithOffset(t *testing.T) { + // Non-UTC offsets must be normalised to UTC for the display. + got := formatCronTimestamp("2025-05-08T05:00:00+02:00") + want := "2025-05-08 03:00:00 UTC" + if got != want { + t.Errorf("formatCronTimestamp offset = %q, want %q", got, want) + } +} + +func TestFormatCronTimestamp_unparseableFallsBackToRaw(t *testing.T) { + // If the server returns an unexpected timestamp shape, surface it + // rather than silently dropping to "-" — operator visibility wins. + got := formatCronTimestamp("not-a-timestamp") + if got != "not-a-timestamp" { + t.Errorf("formatCronTimestamp unparseable = %q, want raw passthrough", got) + } +} diff --git a/core/pkg/cli/monitor/alerts.go b/core/pkg/cli/monitor/alerts.go index 49e1437..317b74b 100644 --- a/core/pkg/cli/monitor/alerts.go +++ b/core/pkg/cli/monitor/alerts.go @@ -124,6 +124,7 @@ func DeriveAlerts(snap *ClusterSnapshot) []Alert { alerts = append(alerts, checkNodeNetwork(r, host)...) alerts = append(alerts, checkNodeOlric(r, host)...) alerts = append(alerts, checkNodeIPFS(r, host)...) + alerts = append(alerts, checkNodeVault(r, host)...) alerts = append(alerts, checkNodeGateway(r, host)...) } @@ -866,6 +867,41 @@ func checkNodeIPFS(r *report.NodeReport, host string) []Alert { return alerts } +func checkNodeVault(r *report.NodeReport, host string) []Alert { + if r.Vault == nil { + return nil + } + var alerts []Alert + + if !r.Vault.ServiceActive { + alerts = append(alerts, Alert{AlertCritical, "vault", host, "Vault service not running"}) + return alerts + } + + if !r.Vault.Responsive { + alerts = append(alerts, Alert{AlertWarning, "vault", host, "Vault not responding to health queries"}) + return alerts + } + + switch r.Vault.Status { + case "unavailable": + alerts = append(alerts, Alert{AlertCritical, "vault", host, + fmt.Sprintf("Vault unavailable: %d/%d guardians healthy (need %d for reads)", + r.Vault.Healthy, r.Vault.Guardians, r.Vault.Threshold)}) + case "degraded": + alerts = append(alerts, Alert{AlertWarning, "vault", host, + fmt.Sprintf("Vault degraded: %d/%d guardians healthy (need %d for writes)", + r.Vault.Healthy, r.Vault.Guardians, r.Vault.WriteQuorum)}) + } + + if r.Vault.RestartCount > 3 { + alerts = append(alerts, Alert{AlertWarning, "vault", host, + fmt.Sprintf("Vault restarted %d times", r.Vault.RestartCount)}) + } + + return alerts +} + func checkNodeGateway(r *report.NodeReport, host string) []Alert { if r.Gateway == nil { return nil diff --git a/core/pkg/cli/monitor/alerts_vault_test.go b/core/pkg/cli/monitor/alerts_vault_test.go new file mode 100644 index 0000000..2ea302d --- /dev/null +++ b/core/pkg/cli/monitor/alerts_vault_test.go @@ -0,0 +1,120 @@ +package monitor + +import ( + "testing" + + "github.com/DeBrosOfficial/network/pkg/cli/production/report" +) + +func TestCheckNodeVault_nil(t *testing.T) { + r := &report.NodeReport{} + alerts := checkNodeVault(r, "10.0.0.1") + if len(alerts) != 0 { + t.Errorf("expected 0 alerts for nil vault, got %d", len(alerts)) + } +} + +func TestCheckNodeVault_serviceInactive(t *testing.T) { + r := &report.NodeReport{ + Vault: &report.VaultReport{ServiceActive: false}, + } + alerts := checkNodeVault(r, "10.0.0.1") + if len(alerts) != 1 { + t.Fatalf("expected 1 alert, got %d", len(alerts)) + } + if alerts[0].Severity != AlertCritical { + t.Errorf("expected critical, got %s", alerts[0].Severity) + } +} + +func TestCheckNodeVault_unresponsive(t *testing.T) { + r := &report.NodeReport{ + Vault: &report.VaultReport{ServiceActive: true, Responsive: false}, + } + alerts := checkNodeVault(r, "10.0.0.1") + if len(alerts) != 1 { + t.Fatalf("expected 1 alert, got %d", len(alerts)) + } + if alerts[0].Severity != AlertWarning { + t.Errorf("expected warning, got %s", alerts[0].Severity) + } +} + +func TestCheckNodeVault_unavailable(t *testing.T) { + r := &report.NodeReport{ + Vault: &report.VaultReport{ + ServiceActive: true, + Responsive: true, + Status: "unavailable", + Guardians: 5, + Healthy: 1, + Threshold: 3, + WriteQuorum: 4, + }, + } + alerts := checkNodeVault(r, "10.0.0.1") + if len(alerts) != 1 { + t.Fatalf("expected 1 alert, got %d", len(alerts)) + } + if alerts[0].Severity != AlertCritical { + t.Errorf("expected critical, got %s", alerts[0].Severity) + } +} + +func TestCheckNodeVault_degraded(t *testing.T) { + r := &report.NodeReport{ + Vault: &report.VaultReport{ + ServiceActive: true, + Responsive: true, + Status: "degraded", + Guardians: 5, + Healthy: 3, + Threshold: 3, + WriteQuorum: 4, + }, + } + alerts := checkNodeVault(r, "10.0.0.1") + if len(alerts) != 1 { + t.Fatalf("expected 1 alert, got %d", len(alerts)) + } + if alerts[0].Severity != AlertWarning { + t.Errorf("expected warning, got %s", alerts[0].Severity) + } +} + +func TestCheckNodeVault_excessiveRestarts(t *testing.T) { + r := &report.NodeReport{ + Vault: &report.VaultReport{ + ServiceActive: true, + Responsive: true, + Status: "healthy", + RestartCount: 5, + }, + } + alerts := checkNodeVault(r, "10.0.0.1") + if len(alerts) != 1 { + t.Fatalf("expected 1 alert, got %d", len(alerts)) + } + if alerts[0].Severity != AlertWarning { + t.Errorf("expected warning, got %s", alerts[0].Severity) + } +} + +func TestCheckNodeVault_healthy(t *testing.T) { + r := &report.NodeReport{ + Vault: &report.VaultReport{ + ServiceActive: true, + Responsive: true, + Status: "healthy", + Guardians: 5, + Healthy: 5, + Threshold: 3, + WriteQuorum: 4, + RestartCount: 0, + }, + } + alerts := checkNodeVault(r, "10.0.0.1") + if len(alerts) != 0 { + t.Errorf("expected 0 alerts for healthy vault, got %d", len(alerts)) + } +} diff --git a/core/pkg/cli/noderesolver/resolver.go b/core/pkg/cli/noderesolver/resolver.go new file mode 100644 index 0000000..0843c53 --- /dev/null +++ b/core/pkg/cli/noderesolver/resolver.go @@ -0,0 +1,161 @@ +// Package noderesolver provides unified node discovery for the orama CLI. +// +// It resolves operator-owned nodes by querying the network's gateway API +// (primary) or falling back to the legacy nodes.conf file. +package noderesolver + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "github.com/DeBrosOfficial/network/pkg/auth" + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" +) + +// httpClient is the shared HTTP client for API calls. +var httpClient = &http.Client{Timeout: 10 * time.Second} + +// ResolveNodes returns the operator's nodes for a given environment. +// It first tries the network API (GET /v1/operator/nodes), then falls +// back to nodes.conf if the API is unreachable or returns no results. +func ResolveNodes(env string) ([]inspector.Node, error) { + nodes, err := resolveFromNetwork(env) + if err == nil && len(nodes) > 0 { + return nodes, nil + } + + // Fallback to nodes.conf + confNodes, confErr := remotessh.LoadEnvNodes(env) + if confErr != nil { + if err != nil { + return nil, fmt.Errorf("network API: %w; nodes.conf: %v", err, confErr) + } + return nil, confErr + } + return confNodes, nil +} + +// ResolveNodesNetworkOnly queries only the network API without nodes.conf fallback. +func ResolveNodesNetworkOnly(env string) ([]inspector.Node, error) { + return resolveFromNetwork(env) +} + +// resolveFromNetwork queries the gateway API for operator-owned nodes. +func resolveFromNetwork(env string) ([]inspector.Node, error) { + // 1. Get gateway URL for the environment + gatewayURL, err := gatewayURLForEnv(env) + if err != nil { + return nil, fmt.Errorf("failed to resolve gateway URL: %w", err) + } + + // 2. Load stored credentials for this gateway + apiKey, err := loadAPIKey(gatewayURL) + if err != nil { + return nil, fmt.Errorf("no credentials for %s: %w (run 'orama auth login' first)", gatewayURL, err) + } + + return resolveFromNetworkWithURL(gatewayURL, apiKey, env) +} + +// resolveFromNetworkWithURL queries a specific gateway URL with an API key. +// Exported for testing. +func resolveFromNetworkWithURL(gatewayURL, apiKey, env string) ([]inspector.Node, error) { + endpoint := fmt.Sprintf("%s/v1/operator/nodes?env=%s", gatewayURL, url.QueryEscape(env)) + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + req.Header.Set("X-API-Key", apiKey) + + resp, err := httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to reach gateway: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("gateway returned HTTP %d: %s", resp.StatusCode, string(body)) + } + + var result struct { + Nodes []struct { + ID string `json:"id"` + IPAddress string `json:"ip_address"` + InternalIP string `json:"internal_ip"` + Environment string `json:"environment"` + Role string `json:"role"` + SSHUser string `json:"ssh_user"` + Status string `json:"status"` + } `json:"nodes"` + } + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("failed to parse response: %w", err) + } + + nodes := make([]inspector.Node, 0, len(result.Nodes)) + for _, n := range result.Nodes { + user := n.SSHUser + if user == "" { + user = "root" + } + // Sandbox nodes share a single SSH key; production nodes use per-host keys. + vaultTarget := fmt.Sprintf("%s/%s", n.IPAddress, user) + if n.Environment == "sandbox" { + vaultTarget = "sandbox/root" + } + nodes = append(nodes, inspector.Node{ + Environment: n.Environment, + User: user, + Host: n.IPAddress, + Role: n.Role, + VaultTarget: vaultTarget, + }) + } + + return nodes, nil +} + +// gatewayURLForEnv returns the gateway URL for a given environment name. +// If env is empty, uses the active environment. +func gatewayURLForEnv(env string) (string, error) { + if env == "" { + e, err := cli.GetActiveEnvironment() + if err != nil { + return "", err + } + return e.GatewayURL, nil + } + + e, err := cli.GetEnvironmentByName(env) + if err != nil { + return "", err + } + return e.GatewayURL, nil +} + +// loadAPIKey loads the stored API key for a gateway URL. +func loadAPIKey(gatewayURL string) (string, error) { + store, err := auth.LoadEnhancedCredentials() + if err != nil { + return "", fmt.Errorf("failed to load credentials: %w", err) + } + + creds := store.GetDefaultCredential(gatewayURL) + if creds == nil || creds.APIKey == "" { + return "", fmt.Errorf("no credentials found for %s", gatewayURL) + } + + return creds.APIKey, nil +} diff --git a/core/pkg/cli/noderesolver/resolver_test.go b/core/pkg/cli/noderesolver/resolver_test.go new file mode 100644 index 0000000..3750e87 --- /dev/null +++ b/core/pkg/cli/noderesolver/resolver_test.go @@ -0,0 +1,152 @@ +package noderesolver + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestGatewayURLForEnv_knownEnv(t *testing.T) { + url, err := gatewayURLForEnv("devnet") + if err != nil { + t.Fatalf("gatewayURLForEnv(devnet): %v", err) + } + if url == "" { + t.Error("expected non-empty gateway URL for devnet") + } +} + +func TestGatewayURLForEnv_unknownEnv(t *testing.T) { + _, err := gatewayURLForEnv("nonexistent") + if err == nil { + t.Error("expected error for unknown environment") + } +} + +func TestResolveFromMockServer_happyPath(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/operator/nodes" { + http.Error(w, "not found", http.StatusNotFound) + return + } + if r.Header.Get("X-API-Key") != "test-key" { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + + env := r.URL.Query().Get("env") + resp := map[string]interface{}{ + "nodes": []map[string]string{ + {"id": "node-1", "ip_address": "1.2.3.4", "environment": env, "role": "nameserver", "ssh_user": "root", "status": "active"}, + {"id": "node-2", "ip_address": "5.6.7.8", "environment": env, "role": "node", "ssh_user": "ubuntu", "status": "active"}, + }, + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) + })) + defer server.Close() + + nodes, err := resolveFromNetworkWithURL(server.URL, "test-key", "devnet") + if err != nil { + t.Fatalf("resolveFromNetworkWithURL: %v", err) + } + + if len(nodes) != 2 { + t.Fatalf("expected 2 nodes, got %d", len(nodes)) + } + + if nodes[0].Host != "1.2.3.4" { + t.Errorf("node 0 host = %q, want %q", nodes[0].Host, "1.2.3.4") + } + if nodes[0].Role != "nameserver" { + t.Errorf("node 0 role = %q, want %q", nodes[0].Role, "nameserver") + } + if nodes[0].VaultTarget != "1.2.3.4/root" { + t.Errorf("node 0 vault target = %q, want %q", nodes[0].VaultTarget, "1.2.3.4/root") + } + if nodes[0].Environment != "devnet" { + t.Errorf("node 0 environment = %q, want %q", nodes[0].Environment, "devnet") + } + if nodes[1].User != "ubuntu" { + t.Errorf("node 1 user = %q, want %q", nodes[1].User, "ubuntu") + } + if nodes[1].VaultTarget != "5.6.7.8/ubuntu" { + t.Errorf("node 1 vault target = %q, want %q", nodes[1].VaultTarget, "5.6.7.8/ubuntu") + } +} + +func TestResolveFromMockServer_emptySSHUser(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + resp := map[string]interface{}{ + "nodes": []map[string]string{ + {"id": "node-1", "ip_address": "1.2.3.4", "environment": "devnet", "role": "node", "ssh_user": "", "status": "active"}, + }, + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) + })) + defer server.Close() + + nodes, err := resolveFromNetworkWithURL(server.URL, "key", "devnet") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(nodes) != 1 { + t.Fatalf("expected 1 node, got %d", len(nodes)) + } + if nodes[0].User != "root" { + t.Errorf("user = %q, want %q (default)", nodes[0].User, "root") + } + if nodes[0].VaultTarget != "1.2.3.4/root" { + t.Errorf("vault target = %q, want %q", nodes[0].VaultTarget, "1.2.3.4/root") + } +} + +func TestResolveFromMockServer_unauthorized(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, `{"error":"unauthorized"}`, http.StatusUnauthorized) + })) + defer server.Close() + + _, err := resolveFromNetworkWithURL(server.URL, "bad-key", "devnet") + if err == nil { + t.Error("expected error for unauthorized request") + } +} + +func TestResolveFromMockServer_emptyNodes(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{"nodes": []interface{}{}}) + })) + defer server.Close() + + nodes, err := resolveFromNetworkWithURL(server.URL, "key", "devnet") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(nodes) != 0 { + t.Errorf("expected 0 nodes, got %d", len(nodes)) + } +} + +func TestResolveFromMockServer_malformedJSON(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`not json`)) + })) + defer server.Close() + + _, err := resolveFromNetworkWithURL(server.URL, "key", "devnet") + if err == nil { + t.Error("expected error for malformed JSON response") + } +} + +func TestResolveFromMockServer_serverDown(t *testing.T) { + _, err := resolveFromNetworkWithURL("http://127.0.0.1:1", "key", "devnet") + if err == nil { + t.Error("expected error for unreachable server") + } +} diff --git a/core/pkg/cli/production/clean/clean.go b/core/pkg/cli/production/clean/clean.go index 547a9a3..fe4b61f 100644 --- a/core/pkg/cli/production/clean/clean.go +++ b/core/pkg/cli/production/clean/clean.go @@ -133,7 +133,7 @@ func cleanNode(node inspector.Node, nuclear bool) error { %s # Stop services -for svc in caddy coredns orama-node orama-gateway orama-ipfs-cluster orama-ipfs orama-olric orama-anyone-relay orama-anyone-client; do +for svc in caddy coredns orama-node orama-gateway orama-ipfs-cluster orama-ipfs orama-olric orama-vault orama-anyone-relay orama-anyone-client; do systemctl stop "$svc" 2>/dev/null systemctl disable "$svc" 2>/dev/null done @@ -171,7 +171,7 @@ rm -f /tmp/orama-*.sh /tmp/network-source.tar.gz /tmp/orama-*.tar.gz # Nuclear: remove binaries if [ -n "$NUCLEAR" ]; then rm -f /usr/local/bin/orama /usr/local/bin/orama-node /usr/local/bin/gateway - rm -f /usr/local/bin/identity /usr/local/bin/sfu /usr/local/bin/turn + rm -f /usr/local/bin/identity /usr/local/bin/sfu /usr/local/bin/turn /usr/local/bin/orama-sni-router rm -f /usr/local/bin/olric-server /usr/local/bin/ipfs /usr/local/bin/ipfs-cluster-service rm -f /usr/local/bin/rqlited /usr/local/bin/coredns rm -f /usr/bin/caddy diff --git a/core/pkg/cli/production/install/flags.go b/core/pkg/cli/production/install/flags.go index 50b844e..d3a360d 100644 --- a/core/pkg/cli/production/install/flags.go +++ b/core/pkg/cli/production/install/flags.go @@ -43,6 +43,11 @@ type Flags struct { AnyoneFamily string // Comma-separated fingerprints of other relays you operate AnyoneBandwidth int // Percentage of VPS bandwidth for relay (default: 30, 0=unlimited) AnyoneAccounting int // Monthly data cap for relay in GB (0=unlimited) + + // Operator metadata (set by orama node setup, written to node.yaml for registration) + SSHUser string // SSH user for remote management + Environment string // Environment name (devnet, testnet, etc.) + OperatorWallet string // Operator wallet address } // ParseFlags parses install command flags @@ -90,6 +95,11 @@ func ParseFlags(args []string) (*Flags, error) { fs.IntVar(&flags.AnyoneBandwidth, "anyone-bandwidth", 30, "Limit relay to N% of VPS bandwidth (0=unlimited, runs speedtest)") fs.IntVar(&flags.AnyoneAccounting, "anyone-accounting", 0, "Monthly data cap for relay in GB (0=unlimited)") + // Operator metadata (set by orama node setup) + fs.StringVar(&flags.SSHUser, "ssh-user", "", "SSH user for remote management") + fs.StringVar(&flags.Environment, "environment", "", "Environment name (devnet, testnet, etc.)") + fs.StringVar(&flags.OperatorWallet, "operator-wallet", "", "Operator wallet address") + if err := fs.Parse(args); err != nil { if err == flag.ErrHelp { return nil, err diff --git a/core/pkg/cli/production/install/orchestrator.go b/core/pkg/cli/production/install/orchestrator.go index 04a4054..58f0f0d 100644 --- a/core/pkg/cli/production/install/orchestrator.go +++ b/core/pkg/cli/production/install/orchestrator.go @@ -68,6 +68,11 @@ func NewOrchestrator(flags *Flags) (*Orchestrator, error) { setup.SetAnyoneClient(true) } + // Set operator metadata (from orama node setup) + setup.SSHUser = flags.SSHUser + setup.Environment = flags.Environment + setup.OperatorWallet = flags.OperatorWallet + validator := NewValidator(flags, oramaDir) return &Orchestrator{ diff --git a/core/pkg/cli/production/lifecycle/restart.go b/core/pkg/cli/production/lifecycle/restart.go index 3560b1c..07e7e1d 100644 --- a/core/pkg/cli/production/lifecycle/restart.go +++ b/core/pkg/cli/production/lifecycle/restart.go @@ -53,6 +53,7 @@ func HandleRestartWithFlags(force bool) { {"orama-node"}, {"orama-olric"}, {"orama-ipfs-cluster", "orama-ipfs"}, + {"orama-vault"}, {"orama-anyone-relay", "orama-anyone-client"}, {"coredns", "caddy"}, } diff --git a/core/pkg/cli/production/lifecycle/stop.go b/core/pkg/cli/production/lifecycle/stop.go index 0e7f289..53433ad 100644 --- a/core/pkg/cli/production/lifecycle/stop.go +++ b/core/pkg/cli/production/lifecycle/stop.go @@ -55,8 +55,9 @@ func HandleStopWithFlags(force bool) { {"orama-node"}, // 1. Stop node (includes gateway + RQLite with leadership transfer) {"orama-olric"}, // 2. Stop cache {"orama-ipfs-cluster", "orama-ipfs"}, // 3. Stop storage - {"orama-anyone-relay", "orama-anyone-client"}, // 4. Stop privacy relay - {"coredns", "caddy"}, // 5. Stop DNS/TLS last + {"orama-vault"}, // 4. Stop vault + {"orama-anyone-relay", "orama-anyone-client"}, // 5. Stop privacy relay + {"coredns", "caddy"}, // 6. Stop DNS/TLS last } // Mask all services to immediately prevent Restart=always from reviving them. diff --git a/core/pkg/cli/production/report/processes.go b/core/pkg/cli/production/report/processes.go index bd5038d..1cc8243 100644 --- a/core/pkg/cli/production/report/processes.go +++ b/core/pkg/cli/production/report/processes.go @@ -89,6 +89,7 @@ func collectProcesses() *ProcessReport { var managedServiceUnits = []string{ "orama-node", "orama-olric", "orama-ipfs", "orama-ipfs-cluster", + "orama-vault", "orama-anyone-relay", "orama-anyone-client", "coredns", "caddy", "rqlited", } diff --git a/core/pkg/cli/production/report/report.go b/core/pkg/cli/production/report/report.go index 317a44b..2c72791 100644 --- a/core/pkg/cli/production/report/report.go +++ b/core/pkg/cli/production/report/report.go @@ -71,6 +71,10 @@ func Handle(jsonFlag bool, version string) error { rpt.IPFS = collectIPFS() }) + safeGo(&wg, "vault", func() { + rpt.Vault = collectVault() + }) + safeGo(&wg, "gateway", func() { rpt.Gateway = collectGateway() }) diff --git a/core/pkg/cli/production/report/services.go b/core/pkg/cli/production/report/services.go index 5138927..5939e28 100644 --- a/core/pkg/cli/production/report/services.go +++ b/core/pkg/cli/production/report/services.go @@ -13,6 +13,7 @@ var coreServices = []string{ "orama-olric", "orama-ipfs", "orama-ipfs-cluster", + "orama-vault", "orama-anyone-relay", "orama-anyone-client", "coredns", diff --git a/core/pkg/cli/production/report/types.go b/core/pkg/cli/production/report/types.go index 7607917..29f6df4 100644 --- a/core/pkg/cli/production/report/types.go +++ b/core/pkg/cli/production/report/types.go @@ -17,6 +17,7 @@ type NodeReport struct { RQLite *RQLiteReport `json:"rqlite,omitempty"` Olric *OlricReport `json:"olric,omitempty"` IPFS *IPFSReport `json:"ipfs,omitempty"` + Vault *VaultReport `json:"vault,omitempty"` Gateway *GatewayReport `json:"gateway,omitempty"` WireGuard *WireGuardReport `json:"wireguard,omitempty"` DNS *DNSReport `json:"dns,omitempty"` @@ -150,6 +151,21 @@ type IPFSReport struct { BootstrapEmpty bool `json:"bootstrap_empty"` } +// --- Vault --- + +type VaultReport struct { + ServiceActive bool `json:"service_active"` + Responsive bool `json:"responsive"` + Status string `json:"status,omitempty"` // "healthy", "degraded", "unavailable" + Guardians int `json:"guardians,omitempty"` + Healthy int `json:"healthy,omitempty"` + Threshold int `json:"threshold,omitempty"` + WriteQuorum int `json:"write_quorum,omitempty"` + ProcessMemMB int `json:"process_mem_mb"` + RestartCount int `json:"restart_count"` + LogErrors int `json:"log_errors_1h"` +} + // --- Gateway --- type GatewayReport struct { diff --git a/core/pkg/cli/production/report/vault.go b/core/pkg/cli/production/report/vault.go new file mode 100644 index 0000000..45e269f --- /dev/null +++ b/core/pkg/cli/production/report/vault.go @@ -0,0 +1,70 @@ +package report + +import ( + "context" + "encoding/json" + "strconv" + "strings" + "time" +) + +func collectVault() *VaultReport { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + r := &VaultReport{} + + // 1. Service active + if out, err := runCmd(ctx, "systemctl", "is-active", "orama-vault"); err == nil { + r.ServiceActive = strings.TrimSpace(out) == "active" + } + + // 2. Restart count + if out, err := runCmd(ctx, "systemctl", "show", "orama-vault", "--property=NRestarts"); err == nil { + if parts := strings.SplitN(out, "=", 2); len(parts) == 2 { + r.RestartCount, _ = strconv.Atoi(strings.TrimSpace(parts[1])) + } + } + + // 3. Process memory + if out, err := runCmd(ctx, "systemctl", "show", "orama-vault", "--property=MemoryCurrent"); err == nil { + if parts := strings.SplitN(out, "=", 2); len(parts) == 2 { + r.ProcessMemMB = parseMemoryMB(parts[1]) + } + } + + // 4. Log errors in last hour + if out, err := runCmd(ctx, "bash", "-c", + `journalctl -u orama-vault --no-pager -n 200 --since "1 hour ago" 2>/dev/null | grep -ciE "(error|ERR)" || echo 0`); err == nil { + r.LogErrors, _ = strconv.Atoi(strings.TrimSpace(out)) + } + + // 5. Query vault status via gateway (provides guardian health) + if body, err := httpGet(ctx, "http://localhost:6001/v1/vault/status"); err == nil { + var status struct { + Guardians int `json:"guardians"` + Healthy int `json:"healthy"` + Threshold int `json:"threshold"` + WriteQuorum int `json:"write_quorum"` + } + if json.Unmarshal(body, &status) == nil { + r.Responsive = true + r.Guardians = status.Guardians + r.Healthy = status.Healthy + r.Threshold = status.Threshold + r.WriteQuorum = status.WriteQuorum + } + } + + // 6. Query vault health status + if body, err := httpGet(ctx, "http://localhost:6001/v1/vault/health"); err == nil { + var health struct { + Status string `json:"status"` + } + if json.Unmarshal(body, &health) == nil { + r.Status = health.Status + } + } + + return r +} diff --git a/core/pkg/cli/production/setup/command.go b/core/pkg/cli/production/setup/command.go new file mode 100644 index 0000000..6c08aad --- /dev/null +++ b/core/pkg/cli/production/setup/command.go @@ -0,0 +1,369 @@ +// Package setup implements the "orama node setup" command — a single command +// to bootstrap a fresh VPS into a running Orama node. +// +// Flow: +// 1. Create SSH key in rootwallet vault for this node +// 2. Install the public key on the VPS (one-time password-based SSH) +// 3. Upload the binary archive +// 4. For genesis: run install without --join +// 5. For joining: request invite token via operator API, run install with --join +package setup + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/DeBrosOfficial/network/pkg/auth" + "github.com/DeBrosOfficial/network/pkg/cli" + "github.com/DeBrosOfficial/network/pkg/cli/remotessh" + "github.com/DeBrosOfficial/network/pkg/inspector" + "github.com/DeBrosOfficial/network/pkg/rwagent" +) + +// Options holds the flags for the setup command. +type Options struct { + IP string + Env string + Role string // "node" or "nameserver" + User string // SSH user (default: "root") + Password string // One-time password for initial SSH access + BaseDomain string + Gateway string // Gateway URL to use for invite tokens (overrides env config) + Genesis bool // If true, create a new cluster instead of joining + AnyoneRelay bool +} + +// Run executes the node setup. +func Run(opts Options) error { + if opts.IP == "" { + return fmt.Errorf("--ip is required") + } + if opts.User == "" { + opts.User = "root" + } + if opts.Role == "" { + opts.Role = "node" + } + + // 1. Ensure rootwallet agent is running + fmt.Println("Checking rootwallet agent...") + agentClient := rwagent.New(os.Getenv("RW_AGENT_SOCK")) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + status, err := agentClient.Status(ctx) + if err != nil { + return fmt.Errorf("rootwallet agent not reachable: %w (is the desktop app running?)", err) + } + if status.Locked { + return fmt.Errorf("rootwallet agent is locked — unlock it in the desktop app first") + } + + // 2. Get operator wallet address + addrData, err := agentClient.GetAddress(ctx, "evm") + if err != nil { + return fmt.Errorf("failed to get wallet address: %w", err) + } + fmt.Printf(" Wallet: %s\n", addrData.Address) + + // 3. Create SSH key in rootwallet vault for this node + vaultTarget := fmt.Sprintf("%s/%s", opts.IP, opts.User) + fmt.Printf(" Setting up SSH key for %s...\n", vaultTarget) + + if err := remotessh.EnsureVaultEntry(vaultTarget); err != nil { + return fmt.Errorf("failed to create SSH key in vault: %w", err) + } + + pubKey, err := remotessh.ResolveVaultPublicKey(vaultTarget) + if err != nil { + return fmt.Errorf("failed to get public key: %w", err) + } + + // 4. Install the public key on the VPS via password SSH + if opts.Password != "" { + fmt.Printf(" Installing SSH key on %s...\n", opts.IP) + if err := installPublicKey(opts.IP, opts.User, opts.Password, pubKey); err != nil { + return fmt.Errorf("failed to install SSH key: %w", err) + } + fmt.Println(" SSH key installed") + } else { + fmt.Println(" No --password provided, assuming SSH key is already installed") + } + + // 5. Test SSH with rootwallet key + fmt.Println(" Testing SSH connection...") + node := inspector.Node{ + Host: opts.IP, + User: opts.User, + VaultTarget: vaultTarget, + Environment: opts.Env, + Role: opts.Role, + } + nodes := []inspector.Node{node} + cleanup, err := remotessh.PrepareNodeKeys(nodes) + if err != nil { + return fmt.Errorf("failed to prepare SSH key: %w", err) + } + defer cleanup() + node = nodes[0] // SSHKey is now set + + testResult := inspector.RunSSH(context.Background(), node, "echo ok") + if !testResult.OK() { + return fmt.Errorf("SSH test failed: %s", testResult.Stderr) + } + fmt.Println(" SSH connection OK") + + // 6. Check if binary archive needs uploading + if needsArchiveUpload(node) { + archivePath := findNewestArchive() + if archivePath == "" { + return fmt.Errorf("no binary archive found in /tmp/ (run `orama build` first)") + } + fmt.Printf(" Uploading archive (%s)...\n", filepath.Base(archivePath)) + if err := remotessh.UploadFile(node, archivePath, "/tmp/archive.tar.gz"); err != nil { + return fmt.Errorf("failed to upload archive: %w", err) + } + extractCmd := "sudo bash -c 'mkdir -p /opt/orama && tar xzf /tmp/archive.tar.gz -C /opt/orama && rm -f /tmp/archive.tar.gz'" + if err := remotessh.RunSSHStreaming(node, extractCmd); err != nil { + return fmt.Errorf("failed to extract archive: %w", err) + } + fmt.Println(" Archive extracted") + } else { + fmt.Println(" Binary already present on node") + } + + // 7. Build the install command + installCmd, err := buildInstallCommand(opts, node, agentClient) + if err != nil { + return fmt.Errorf("failed to build install command: %w", err) + } + + fmt.Printf("\n Running: %s\n\n", installCmd) + + // 8. Run the install + if err := remotessh.RunSSHStreaming(node, installCmd); err != nil { + return fmt.Errorf("install failed: %w", err) + } + + // 9. After genesis install, update the environment gateway URL to this node's IP. + // This allows subsequent `node setup` calls to find the gateway automatically. + if opts.Genesis && opts.Env != "" { + gatewayURL := fmt.Sprintf("http://%s", opts.IP) + desc := fmt.Sprintf("%s (genesis: %s)", opts.Env, opts.IP) + if err := cli.AddEnvironment(opts.Env, gatewayURL, desc); err != nil { + fmt.Fprintf(os.Stderr, " Warning: failed to update environment: %v\n", err) + } else { + if err := cli.SwitchEnvironment(opts.Env); err != nil { + fmt.Fprintf(os.Stderr, " Warning: failed to switch environment: %v\n", err) + } + fmt.Printf(" Environment %q updated: gateway → %s\n", opts.Env, gatewayURL) + fmt.Printf("\n To join more nodes, first authenticate:\n") + fmt.Printf(" orama auth login\n") + fmt.Printf(" Then:\n") + fmt.Printf(" orama node setup --ip --password '' --env %s --base-domain %s\n", opts.Env, opts.BaseDomain) + } + } + + fmt.Printf("\n Node %s setup complete!\n", opts.IP) + return nil +} + +// installPublicKey installs an SSH public key on a VPS using password authentication. +func installPublicKey(ip, user, password, pubKey string) error { + sshpassBin, err := findBinary("sshpass") + if err != nil { + return fmt.Errorf("sshpass is required for password-based SSH key installation: %w", err) + } + + // Ensure .ssh directory exists and install the key + cmd := fmt.Sprintf( + `mkdir -p ~/.ssh && chmod 700 ~/.ssh && echo '%s' >> ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && echo 'key installed'`, + strings.TrimSpace(pubKey), + ) + + args := []string{ + "-p", password, + "ssh", + "-o", "StrictHostKeyChecking=no", + "-o", "ConnectTimeout=10", + "-o", "PreferredAuthentications=password", + "-o", "PubkeyAuthentication=no", + fmt.Sprintf("%s@%s", user, ip), + cmd, + } + + out, err := runCommand(sshpassBin, args...) + if err != nil { + return fmt.Errorf("sshpass failed: %w (%s)", err, out) + } + if !strings.Contains(out, "key installed") { + return fmt.Errorf("unexpected output: %s", out) + } + return nil +} + +// buildInstallCommand constructs the `sudo orama node install` command. +func buildInstallCommand(opts Options, node inspector.Node, agentClient *rwagent.Client) (string, error) { + parts := []string{"sudo /opt/orama/bin/orama node install"} + parts = append(parts, "--vps-ip", opts.IP) + + if opts.BaseDomain != "" { + parts = append(parts, "--base-domain", opts.BaseDomain) + } + + if strings.HasPrefix(opts.Role, "nameserver") { + parts = append(parts, "--nameserver") + if opts.BaseDomain != "" { + parts = append(parts, "--domain", opts.BaseDomain) + } + } + + if opts.AnyoneRelay { + parts = append(parts, "--anyone-relay") + } else { + parts = append(parts, "--anyone-client") + } + + // Pass operator metadata so the node registers with correct values + if opts.User != "" { + parts = append(parts, "--ssh-user", opts.User) + } + if opts.Env != "" { + parts = append(parts, "--environment", opts.Env) + } + + // Get wallet address for operator tagging + ctx := context.Background() + if addrData, err := agentClient.GetAddress(ctx, "evm"); err == nil && addrData.Address != "" { + parts = append(parts, "--operator-wallet", addrData.Address) + } + + if !opts.Genesis { + // Determine gateway URL for invite token request + gatewayURL := opts.Gateway + if gatewayURL == "" { + env := opts.Env + if env == "" { + active, err := cli.GetActiveEnvironment() + if err != nil { + return "", fmt.Errorf("failed to get active environment: %w", err) + } + env = active.Name + } + envConfig, err := cli.GetEnvironmentByName(env) + if err != nil { + return "", fmt.Errorf("environment %q not found (use --gateway to specify directly): %w", env, err) + } + gatewayURL = envConfig.GatewayURL + } + + // Request invite token via operator API + token, err := requestInviteToken(gatewayURL) + if err != nil { + return "", fmt.Errorf("failed to get invite token: %w", err) + } + + parts = append(parts, "--join", gatewayURL, "--token", token) + } + + return strings.Join(parts, " "), nil +} + +// requestInviteToken calls POST /v1/operator/invite to get an invite token. +func requestInviteToken(gatewayURL string) (string, error) { + store, err := auth.LoadEnhancedCredentials() + if err != nil { + return "", fmt.Errorf("failed to load credentials: %w", err) + } + creds := store.GetDefaultCredential(gatewayURL) + if creds == nil || creds.APIKey == "" { + return "", fmt.Errorf("no credentials for %s — run 'orama auth login' first", gatewayURL) + } + + body, _ := json.Marshal(map[string]int{"expiry_minutes": 60}) + req, err := http.NewRequest(http.MethodPost, gatewayURL+"/v1/operator/invite", bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-API-Key", creds.APIKey) + + client := &http.Client{Timeout: 15 * time.Second} + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(respBody)) + } + + var result struct { + Token string `json:"token"` + } + if err := json.Unmarshal(respBody, &result); err != nil { + return "", fmt.Errorf("failed to parse response: %w", err) + } + if result.Token == "" { + return "", fmt.Errorf("empty token in response") + } + return result.Token, nil +} + +// needsArchiveUpload checks if the node already has the orama binary. +func needsArchiveUpload(node inspector.Node) bool { + result := inspector.RunSSH(context.Background(), node, "/opt/orama/bin/orama version 2>/dev/null") + return !result.OK() +} + +// findNewestArchive finds the newest orama binary archive in /tmp/. +func findNewestArchive() string { + matches, _ := filepath.Glob("/tmp/orama-*-linux-*.tar.gz") + if len(matches) == 0 { + return "" + } + sort.Slice(matches, func(i, j int) bool { + fi, _ := os.Stat(matches[i]) + fj, _ := os.Stat(matches[j]) + if fi == nil || fj == nil { + return false + } + return fi.ModTime().After(fj.ModTime()) + }) + return matches[0] +} + +func findBinary(name string) (string, error) { + paths := []string{ + "/opt/homebrew/bin/" + name, + "/usr/local/bin/" + name, + "/usr/bin/" + name, + } + for _, p := range paths { + if _, err := os.Stat(p); err == nil { + return p, nil + } + } + return "", fmt.Errorf("%s not found", name) +} + +func runCommand(bin string, args ...string) (string, error) { + cmd := &exec.Cmd{ + Path: bin, + Args: append([]string{bin}, args...), + } + out, err := cmd.CombinedOutput() + return string(out), err +} diff --git a/core/pkg/cli/production/status/command.go b/core/pkg/cli/production/status/command.go index 4120693..c7ea512 100644 --- a/core/pkg/cli/production/status/command.go +++ b/core/pkg/cli/production/status/command.go @@ -17,6 +17,7 @@ func Handle() { "orama-ipfs-cluster", // Note: RQLite is managed by node process, not as separate service "orama-olric", + "orama-vault", "orama-node", // Note: gateway is embedded in orama-node, no separate service } @@ -26,6 +27,7 @@ func Handle() { "orama-ipfs": "IPFS Daemon", "orama-ipfs-cluster": "IPFS Cluster", "orama-olric": "Olric Cache Server", + "orama-vault": "Vault Guardian", "orama-node": "Orama Node (includes RQLite + Gateway)", } diff --git a/core/pkg/cli/production/upgrade/orchestrator.go b/core/pkg/cli/production/upgrade/orchestrator.go index 8c20bdb..38f3319 100644 --- a/core/pkg/cli/production/upgrade/orchestrator.go +++ b/core/pkg/cli/production/upgrade/orchestrator.go @@ -376,6 +376,7 @@ func (o *Orchestrator) stopServices() error { "orama-ipfs-cluster.service", // Depends on IPFS "orama-ipfs.service", // Base IPFS "orama-olric.service", // Independent + "orama-vault.service", // Vault guardian "orama-anyone-client.service", // Client mode "orama-anyone-relay.service", // Relay mode } @@ -683,6 +684,7 @@ func (o *Orchestrator) restartServices() error { "orama-olric", // Distributed cache "orama-ipfs", // IPFS daemon "orama-ipfs-cluster", // IPFS cluster + "orama-vault", // Vault guardian "orama-gateway", // Gateway (legacy) "coredns", // DNS server "caddy", // Reverse proxy diff --git a/core/pkg/cli/remotessh/ssh.go b/core/pkg/cli/remotessh/ssh.go index 3ce5157..73bcd4f 100644 --- a/core/pkg/cli/remotessh/ssh.go +++ b/core/pkg/cli/remotessh/ssh.go @@ -42,7 +42,7 @@ func UploadFile(node inspector.Node, localPath, remotePath string, opts ...SSHOp dest := fmt.Sprintf("%s@%s:%s", node.User, node.Host, remotePath) - args := []string{"-o", "ConnectTimeout=10", "-i", node.SSHKey} + args := []string{"-o", "ConnectTimeout=10", "-o", "IdentitiesOnly=yes", "-i", node.SSHKey} if cfg.noHostKeyCheck { args = append([]string{"-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null"}, args...) } else { @@ -73,7 +73,7 @@ func RunSSHStreaming(node inspector.Node, command string, opts ...SSHOption) err o(&cfg) } - args := []string{"-o", "ConnectTimeout=10", "-i", node.SSHKey} + args := []string{"-o", "ConnectTimeout=10", "-o", "IdentitiesOnly=yes", "-i", node.SSHKey} if cfg.noHostKeyCheck { args = append([]string{"-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null"}, args...) } else { diff --git a/core/pkg/cli/sandbox/create.go b/core/pkg/cli/sandbox/create.go index 36e9bc3..747c150 100644 --- a/core/pkg/cli/sandbox/create.go +++ b/core/pkg/cli/sandbox/create.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/DeBrosOfficial/network/pkg/cli" "github.com/DeBrosOfficial/network/pkg/cli/remotessh" "github.com/DeBrosOfficial/network/pkg/inspector" "github.com/DeBrosOfficial/network/pkg/rwagent" @@ -144,6 +145,18 @@ func Create(name string) error { return fmt.Errorf("save final state: %w", err) } + // Register sandbox as an environment and switch to it + gatewayURL := "https://" + cfg.Domain + desc := fmt.Sprintf("Sandbox cluster: %s (%s)", state.Name, cfg.Domain) + if err := cli.AddEnvironment("sandbox", gatewayURL, desc); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to register sandbox environment: %v\n", err) + } else if err := cli.SwitchEnvironment("sandbox"); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to switch to sandbox environment: %v\n", err) + } + + // Tag all nodes with operator wallet for unified node management + registerNodesWithOperator(state, sshKeyPath) + printCreateSummary(cfg, state) return nil } @@ -633,6 +646,36 @@ func printCreateSummary(cfg *Config, state *SandboxState) { fmt.Println("Destroy: orama sandbox destroy") } +// registerNodesWithOperator tags all sandbox nodes with the operator's wallet +// via a direct RQLite UPDATE on the genesis node. This enables `orama nodes` +// to discover sandbox nodes alongside production nodes. +func registerNodesWithOperator(state *SandboxState, sshKeyPath string) { + client := rwagent.New(os.Getenv("RW_AGENT_SOCK")) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + addrData, err := client.GetAddress(ctx, "evm") + if err != nil || addrData == nil || addrData.Address == "" { + fmt.Fprintf(os.Stderr, "Warning: could not get operator wallet, nodes not tagged: %v\n", err) + return + } + wallet := addrData.Address + + if len(state.Servers) == 0 { + return + } + genesis := state.Servers[0] + + node := inspector.Node{User: "root", Host: genesis.IP, SSHKey: sshKeyPath} + // Use RQLite's parameterized query to avoid any injection risk. + // The JSON payload has the wallet as a parameter, not interpolated into SQL. + payload := fmt.Sprintf(`[["UPDATE dns_nodes SET operator_wallet = ?, environment = 'sandbox' WHERE operator_wallet IS NULL OR operator_wallet = ''", %q]]`, wallet) + cmd := fmt.Sprintf(`curl -sf -X POST http://localhost:5001/db/execute -H 'Content-Type: application/json' -d '%s'`, payload) + if _, err := runSSHOutput(node, cmd); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to tag nodes with operator wallet: %v\n", err) + } +} + // cleanupFailedCreate deletes any servers that were created during a failed provision. func cleanupFailedCreate(client *HetznerClient, state *SandboxState) { if len(state.Servers) == 0 { diff --git a/core/pkg/cli/sandbox/destroy.go b/core/pkg/cli/sandbox/destroy.go index b532a18..8e7ae3d 100644 --- a/core/pkg/cli/sandbox/destroy.go +++ b/core/pkg/cli/sandbox/destroy.go @@ -4,8 +4,11 @@ import ( "bufio" "fmt" "os" + "os/exec" "strings" "sync" + + "github.com/DeBrosOfficial/network/pkg/cli" ) // Destroy tears down a sandbox cluster. @@ -100,10 +103,30 @@ func Destroy(name string, force bool) error { return fmt.Errorf("delete state: %w", err) } + // Remove sandbox environment entry, fall back to devnet + if err := cli.RemoveEnvironment("sandbox"); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to remove sandbox environment: %v\n", err) + } + + // Clean up SSH known_hosts entries for destroyed server IPs. + // This prevents "REMOTE HOST IDENTIFICATION HAS CHANGED" errors + // when the same IPs are reused by a new sandbox. + cleanupKnownHosts(state) + fmt.Printf("\nSandbox %q destroyed (%d servers deleted)\n", state.Name, len(state.Servers)) return nil } +// cleanupKnownHosts removes SSH known_hosts entries for all sandbox server IPs. +func cleanupKnownHosts(state *SandboxState) { + for _, srv := range state.Servers { + cmd := exec.Command("ssh-keygen", "-R", srv.IP) + cmd.Stdout = nil + cmd.Stderr = nil + cmd.Run() // best-effort, ignore errors + } +} + // resolveSandbox finds a sandbox by name or returns the active one. func resolveSandbox(name string) (*SandboxState, error) { if name != "" { diff --git a/core/pkg/cli/utils/systemd.go b/core/pkg/cli/utils/systemd.go index b4a6ffb..2869f33 100644 --- a/core/pkg/cli/utils/systemd.go +++ b/core/pkg/cli/utils/systemd.go @@ -162,6 +162,7 @@ func GetProductionServices() []string { "orama-olric", "orama-ipfs-cluster", "orama-ipfs", + "orama-vault", "orama-anyone-client", "orama-anyone-relay", } diff --git a/core/pkg/client/interface.go b/core/pkg/client/interface.go index 2c7e40b..1fff4c9 100644 --- a/core/pkg/client/interface.go +++ b/core/pkg/client/interface.go @@ -47,10 +47,29 @@ type DatabaseClient interface { type PubSubClient interface { Subscribe(ctx context.Context, topic string, handler MessageHandler) error Publish(ctx context.Context, topic string, data []byte) error + // PublishBatch publishes multiple messages in parallel, one per topic. + // See pubsub.Manager.PublishBatch for semantics (fail-fast vs. best-effort). + PublishBatch(ctx context.Context, msgs []TopicMessage, opts PublishBatchOptions) error + // PublishSame sends the same payload to every topic in parallel. + PublishSame(ctx context.Context, topics []string, data []byte, opts PublishBatchOptions) error Unsubscribe(ctx context.Context, topic string) error ListTopics(ctx context.Context) ([]string, error) } +// TopicMessage is one entry in a batch publish. +// Mirrors pubsub.TopicMessage to avoid forcing client callers to import pkg/pubsub. +type TopicMessage struct { + Topic string + Data []byte +} + +// PublishBatchOptions controls batch publish behavior. +// Mirrors pubsub.PublishBatchOptions. +type PublishBatchOptions struct { + BestEffort bool + MaxConcurrency int +} + // NetworkInfo provides network status and peer information type NetworkInfo interface { GetPeers(ctx context.Context) ([]PeerInfo, error) diff --git a/core/pkg/client/pubsub_bridge.go b/core/pkg/client/pubsub_bridge.go index 653301e..79780b0 100644 --- a/core/pkg/client/pubsub_bridge.go +++ b/core/pkg/client/pubsub_bridge.go @@ -4,13 +4,13 @@ import ( "context" "fmt" - "github.com/DeBrosOfficial/network/pkg/pubsub" + pkgpubsub "github.com/DeBrosOfficial/network/pkg/pubsub" ) // pubSubBridge bridges between our PubSubClient interface and the pubsub package type pubSubBridge struct { client *Client - adapter *pubsub.ClientAdapter + adapter *pkgpubsub.ClientAdapter } func (p *pubSubBridge) Subscribe(ctx context.Context, topic string, handler MessageHandler) error { @@ -31,6 +31,26 @@ func (p *pubSubBridge) Publish(ctx context.Context, topic string, data []byte) e return p.adapter.Publish(ctx, topic, data) } +func (p *pubSubBridge) PublishBatch(ctx context.Context, msgs []TopicMessage, opts PublishBatchOptions) error { + if err := p.client.requireAccess(ctx); err != nil { + return fmt.Errorf("authentication required: %w - run CLI commands to authenticate automatically", err) + } + pkgMsgs := make([]pkgpubsub.TopicMessage, len(msgs)) + for i, m := range msgs { + pkgMsgs[i] = pkgpubsub.TopicMessage{Topic: m.Topic, Data: m.Data} + } + pkgOpts := pkgpubsub.PublishBatchOptions{BestEffort: opts.BestEffort, MaxConcurrency: opts.MaxConcurrency} + return p.adapter.PublishBatch(ctx, pkgMsgs, pkgOpts) +} + +func (p *pubSubBridge) PublishSame(ctx context.Context, topics []string, data []byte, opts PublishBatchOptions) error { + if err := p.client.requireAccess(ctx); err != nil { + return fmt.Errorf("authentication required: %w - run CLI commands to authenticate automatically", err) + } + pkgOpts := pkgpubsub.PublishBatchOptions{BestEffort: opts.BestEffort, MaxConcurrency: opts.MaxConcurrency} + return p.adapter.PublishSame(ctx, topics, data, pkgOpts) +} + func (p *pubSubBridge) Unsubscribe(ctx context.Context, topic string) error { if err := p.client.requireAccess(ctx); err != nil { return fmt.Errorf("authentication required: %w - run CLI commands to authenticate automatically", err) diff --git a/core/pkg/config/node_config.go b/core/pkg/config/node_config.go index a23ffcc..ab070c9 100644 --- a/core/pkg/config/node_config.go +++ b/core/pkg/config/node_config.go @@ -7,4 +7,7 @@ type NodeConfig struct { DataDir string `yaml:"data_dir"` // Data directory MaxConnections int `yaml:"max_connections"` // Maximum peer connections Domain string `yaml:"domain"` // Domain for this node (e.g., node-1.orama.network) + SSHUser string `yaml:"ssh_user,omitempty"` // SSH user for remote management + Environment string `yaml:"environment,omitempty"` // Environment name (devnet, testnet, etc.) + OperatorWallet string `yaml:"operator_wallet,omitempty"` // Operator wallet address } diff --git a/core/pkg/deployments/home_node_test.go b/core/pkg/deployments/home_node_test.go index 8b63ef6..2ee9d97 100644 --- a/core/pkg/deployments/home_node_test.go +++ b/core/pkg/deployments/home_node_test.go @@ -181,6 +181,10 @@ func (m *mockHomeNodeDB) Tx(ctx context.Context, fn func(tx rqlite.Tx) error) er return m.mockRQLiteClient.Tx(ctx, fn) } +func (m *mockHomeNodeDB) Batch(ctx context.Context, ops []rqlite.BatchOp) (*rqlite.BatchResult, error) { + return m.mockRQLiteClient.Batch(ctx, ops) +} + func (m *mockHomeNodeDB) addDeployment(nodeID, deploymentID, status string) { m.deployments[nodeID] = append(m.deployments[nodeID], deploymentData{ id: deploymentID, diff --git a/core/pkg/deployments/port_allocator_test.go b/core/pkg/deployments/port_allocator_test.go index 89d9f23..674130e 100644 --- a/core/pkg/deployments/port_allocator_test.go +++ b/core/pkg/deployments/port_allocator_test.go @@ -149,6 +149,15 @@ func (m *mockRQLiteClient) Tx(ctx context.Context, fn func(tx rqlite.Tx) error) return nil } +func (m *mockRQLiteClient) Batch(ctx context.Context, ops []rqlite.BatchOp) (*rqlite.BatchResult, error) { + return &rqlite.BatchResult{Committed: true, Results: make([]rqlite.OpResult, len(ops))}, nil +} + +func (m *mockRQLiteClient) BatchWithSeq(ctx context.Context, namespace string, ops []rqlite.BatchOp) (*rqlite.BatchResult, int64, error) { + res, err := m.Batch(ctx, ops) + return res, 1, err +} + func TestPortAllocator_AllocatePort(t *testing.T) { logger := zap.NewNop() mockDB := newMockRQLiteClient() diff --git a/core/pkg/environments/production/config.go b/core/pkg/environments/production/config.go index cb80560..2eaa530 100644 --- a/core/pkg/environments/production/config.go +++ b/core/pkg/environments/production/config.go @@ -20,7 +20,10 @@ import ( // ConfigGenerator manages generation of node, gateway, and service configs type ConfigGenerator struct { - oramaDir string + oramaDir string + SSHUser string // Operator metadata + Environment string + OperatorWallet string } // NewConfigGenerator creates a new config generator @@ -192,6 +195,11 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri // HTTPS is still used for client-facing gateway traffic via autocert // TLS can be enabled manually later if needed for inter-node encryption + // Operator metadata (set by orama node setup via --ssh-user, --environment, --operator-wallet) + data.SSHUser = cg.SSHUser + data.Environment = cg.Environment + data.OperatorWallet = cg.OperatorWallet + return templates.RenderNodeConfig(data) } diff --git a/core/pkg/environments/production/installers/caddy.go b/core/pkg/environments/production/installers/caddy.go index 5aad389..4e29775 100644 --- a/core/pkg/environments/production/installers/caddy.go +++ b/core/pkg/environments/production/installers/caddy.go @@ -390,7 +390,17 @@ func (ci *CaddyInstaller) generateCaddyfile(domain, email, acmeEndpoint, baseDom sb.WriteString(fmt.Sprintf("\n%s {\n%s\n reverse_proxy localhost:6001\n}\n", baseDomain, tlsBlock)) } - // HTTP fallback (handles plain HTTP and ACME challenges) + // HTTP blocks — serve traffic over plain HTTP so the gateway is reachable + // even when TLS certificates are unavailable (e.g., Let's Encrypt rate limits). + // Without these, Caddy auto-redirects HTTP→HTTPS for the named domain blocks above. + sb.WriteString(fmt.Sprintf("\nhttp://*.%s {\n reverse_proxy localhost:6001\n}\n", domain)) + sb.WriteString(fmt.Sprintf("\nhttp://%s {\n reverse_proxy localhost:6001\n}\n", domain)) + if baseDomain != "" && baseDomain != domain { + sb.WriteString(fmt.Sprintf("\nhttp://*.%s {\n reverse_proxy localhost:6001\n}\n", baseDomain)) + sb.WriteString(fmt.Sprintf("\nhttp://%s {\n reverse_proxy localhost:6001\n}\n", baseDomain)) + } + + // HTTP catch-all fallback (handles remaining plain HTTP traffic) sb.WriteString("\n:80 {\n reverse_proxy localhost:6001\n}\n") return sb.String() diff --git a/core/pkg/environments/production/orchestrator.go b/core/pkg/environments/production/orchestrator.go index 7458c75..4a3ace8 100644 --- a/core/pkg/environments/production/orchestrator.go +++ b/core/pkg/environments/production/orchestrator.go @@ -53,6 +53,11 @@ type ProductionSetup struct { serviceController *SystemdController binaryInstaller *BinaryInstaller NodePeerID string // Captured during Phase3 for later display + + // Operator metadata (from --ssh-user, --environment, --operator-wallet flags) + SSHUser string + Environment string + OperatorWallet string } // ReadBranchPreference reads the stored branch preference from disk @@ -599,6 +604,11 @@ func (ps *ProductionSetup) Phase4GenerateConfigs(peerAddresses []string, vpsIP s ps.logf("Phase 4: Generating configurations...") } + // Propagate operator metadata to config generator + ps.configGenerator.SSHUser = ps.SSHUser + ps.configGenerator.Environment = ps.Environment + ps.configGenerator.OperatorWallet = ps.OperatorWallet + // Node config (unified architecture) nodeConfig, err := ps.configGenerator.GenerateNodeConfig(peerAddresses, vpsIP, joinAddress, domain, baseDomain, enableHTTPS) if err != nil { diff --git a/core/pkg/environments/production/provisioner.go b/core/pkg/environments/production/provisioner.go index 15d8741..0a88537 100644 --- a/core/pkg/environments/production/provisioner.go +++ b/core/pkg/environments/production/provisioner.go @@ -86,31 +86,44 @@ func (fp *FilesystemProvisioner) EnsureDirectoryStructure() error { // EnsureOramaUser creates the 'orama' system user and group for running services. // Sets ownership of the orama data directory to the new user. func (fp *FilesystemProvisioner) EnsureOramaUser() error { - // Check if user already exists - if err := exec.Command("id", "orama").Run(); err == nil { - return nil // user already exists - } - - // Create system user with no login shell and home at /opt/orama - cmd := exec.Command("useradd", "--system", "--no-create-home", - "--home-dir", fp.oramaHome, "--shell", "/usr/sbin/nologin", "orama") - if output, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to create orama user: %w\n%s", err, string(output)) - } - - // Set ownership of orama directories - chown := exec.Command("chown", "-R", "orama:orama", fp.oramaDir) - if output, err := chown.CombinedOutput(); err != nil { - return fmt.Errorf("failed to chown %s: %w\n%s", fp.oramaDir, err, string(output)) - } - - // Also chown the bin directory - binDir := filepath.Join(fp.oramaHome, "bin") - if _, err := os.Stat(binDir); err == nil { - chown = exec.Command("chown", "-R", "orama:orama", binDir) - if output, err := chown.CombinedOutput(); err != nil { - return fmt.Errorf("failed to chown %s: %w\n%s", binDir, err, string(output)) + // Check if user already exists; create if not + if err := exec.Command("id", "orama").Run(); err != nil { + cmd := exec.Command("useradd", "--system", "--no-create-home", + "--home-dir", fp.oramaHome, "--shell", "/usr/sbin/nologin", "orama") + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to create orama user: %w\n%s", err, string(output)) } + + // Set ownership of orama directories (only on first create) + chown := exec.Command("chown", "-R", "orama:orama", fp.oramaDir) + if output, err := chown.CombinedOutput(); err != nil { + return fmt.Errorf("failed to chown %s: %w\n%s", fp.oramaDir, err, string(output)) + } + + binDir := filepath.Join(fp.oramaHome, "bin") + if _, err := os.Stat(binDir); err == nil { + chown = exec.Command("chown", "-R", "orama:orama", binDir) + if output, err := chown.CombinedOutput(); err != nil { + return fmt.Errorf("failed to chown %s: %w\n%s", binDir, err, string(output)) + } + } + } + + // Always ensure the sudoers rule is up-to-date (handles upgrades too). + // Resolve systemctl path to avoid hardcoding /bin vs /usr/bin. + systemctlPath, err := exec.LookPath("systemctl") + if err != nil { + systemctlPath = "/bin/systemctl" // fallback + } + + // Grant orama user permission to manage namespace and deployment services. + sudoersRule := fmt.Sprintf( + "orama ALL=(root) NOPASSWD: %[1]s start orama-namespace-*, %[1]s stop orama-namespace-*, %[1]s enable orama-namespace-*, %[1]s disable orama-namespace-*, %[1]s restart orama-namespace-*, %[1]s start orama-deploy-*, %[1]s stop orama-deploy-*, %[1]s enable orama-deploy-*, %[1]s disable orama-deploy-*, %[1]s restart orama-deploy-*, %[1]s daemon-reload\n", + systemctlPath, + ) + sudoersPath := "/etc/sudoers.d/orama-namespaces" + if err := os.WriteFile(sudoersPath, []byte(sudoersRule), 0440); err != nil { + return fmt.Errorf("failed to write sudoers rule: %w", err) } return nil diff --git a/core/pkg/environments/production/services.go b/core/pkg/environments/production/services.go index 4101e0b..2e8728b 100644 --- a/core/pkg/environments/production/services.go +++ b/core/pkg/environments/production/services.go @@ -19,6 +19,18 @@ ProtectKernelTunables=yes ProtectKernelModules=yes RestrictNamespaces=yes` +// oramaNodeHardening is like oramaServiceHardening but WITHOUT NoNewPrivileges. +// The node process (which includes the gateway) needs to use sudo to manage +// namespace systemd services. NoNewPrivileges prevents sudo from working. +const oramaNodeHardening = `User=orama +Group=orama +ProtectSystem=strict +ProtectHome=yes +PrivateDevices=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +RestrictNamespaces=yes` + // SystemdServiceGenerator generates systemd unit files type SystemdServiceGenerator struct { oramaHome string @@ -233,7 +245,7 @@ OOMScoreAdjust=-500 [Install] WantedBy=multi-user.target -`, ssg.oramaHome, ssg.oramaDir, configFile, logFile, oramaServiceHardening) +`, ssg.oramaHome, ssg.oramaDir, configFile, logFile, oramaNodeHardening) } // GenerateVaultService generates the Orama Vault Guardian systemd unit. diff --git a/core/pkg/environments/templates/node.yaml b/core/pkg/environments/templates/node.yaml index e44e9da..8559e0f 100644 --- a/core/pkg/environments/templates/node.yaml +++ b/core/pkg/environments/templates/node.yaml @@ -5,6 +5,15 @@ node: data_dir: "{{.DataDir}}" max_connections: 50 domain: "{{.Domain}}" +{{- if .SSHUser}} + ssh_user: "{{.SSHUser}}" +{{- end}} +{{- if .Environment}} + environment: "{{.Environment}}" +{{- end}} +{{- if .OperatorWallet}} + operator_wallet: "{{.OperatorWallet}}" +{{- end}} database: data_dir: "{{.DataDir}}/rqlite" diff --git a/core/pkg/environments/templates/render.go b/core/pkg/environments/templates/render.go index d867955..135085e 100644 --- a/core/pkg/environments/templates/render.go +++ b/core/pkg/environments/templates/render.go @@ -41,6 +41,11 @@ type NodeConfigData struct { NodeKey string // Path to X.509 private key for node-to-node communication NodeCACert string // Path to CA certificate (optional) NodeNoVerify bool // Skip certificate verification (for self-signed certs) + + // Operator metadata — written to dns_nodes during registration + SSHUser string // SSH user for remote management + Environment string // Environment name (devnet, testnet, etc.) + OperatorWallet string // Operator wallet address } // GatewayConfigData holds parameters for gateway.yaml rendering diff --git a/core/pkg/gateway/auth/jwt.go b/core/pkg/gateway/auth/jwt.go index 7891c3b..4e79fd1 100644 --- a/core/pkg/gateway/auth/jwt.go +++ b/core/pkg/gateway/auth/jwt.go @@ -73,6 +73,10 @@ type JWTClaims struct { Nbf int64 `json:"nbf"` Exp int64 `json:"exp"` Namespace string `json:"namespace"` + // Custom holds app-defined claims (e.g. tier, subscription state). + // Read by serverless functions via the get_caller_claim host call. + // May be nil if the token has no custom claims. + Custom map[string]string `json:"custom,omitempty"` } // ParseAndVerifyJWT verifies a JWT created by this gateway using kid-based key diff --git a/core/pkg/gateway/auth/service_test.go b/core/pkg/gateway/auth/service_test.go index 197451f..434ca05 100644 --- a/core/pkg/gateway/auth/service_test.go +++ b/core/pkg/gateway/auth/service_test.go @@ -416,3 +416,73 @@ func TestJWKSHandler_RSAOnly(t *testing.T) { t.Errorf("expected RS256, got %s", result.Keys[0]["alg"]) } } + +// TestEdDSACrossServiceVerify is the regression test for bug #215. Two Service +// instances configured with the SAME Ed25519 key (the cluster-shared scenario +// produced by deterministic HKDF derivation in pkg/gateway/signing_key.go) +// must be able to verify each other's tokens. Without this guarantee a JWT +// minted on the main gateway is unverifiable on a namespace gateway and host +// functions see an empty caller_jwt_subject. +func TestEdDSACrossServiceVerify(t *testing.T) { + // Shared key — what HKDF would produce from the cluster secret. + _, shared, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + t.Fatalf("generate shared key: %v", err) + } + + makeService := func() *Service { + s := createTestService(t) // RSA + mock client + s.SetEdDSAKey(shared) + return s + } + + signer := makeService() // simulates main gateway + verifier := makeService() // simulates namespace gateway (different process, same shared key) + + // Sanity: both services must agree on edKeyID since it is derived from the + // public key. If they don't, kid-based verification will silently fail. + if signer.edKeyID != verifier.edKeyID { + t.Fatalf("edKeyID mismatch: signer=%q verifier=%q", signer.edKeyID, verifier.edKeyID) + } + + const wantSub = "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE" + const wantNS = "anchat-test" + token, _, err := signer.GenerateJWT(wantNS, wantSub, 15*time.Minute) + if err != nil { + t.Fatalf("signer.GenerateJWT: %v", err) + } + + claims, err := verifier.ParseAndVerifyJWT(token) + if err != nil { + t.Fatalf("cross-service verify failed: %v", err) + } + if claims.Sub != wantSub { + t.Errorf("Sub = %q, want %q", claims.Sub, wantSub) + } + if claims.Namespace != wantNS { + t.Errorf("Namespace = %q, want %q", claims.Namespace, wantNS) + } +} + +// TestEdDSACrossServiceVerify_differentKeysFail proves the verify gate is +// real: when two services have different Ed25519 keys (the broken state +// before bug #215 fix), tokens minted on one MUST NOT validate on the other. +// If this test ever passes, the deterministic-derivation guarantee is +// silently bypassed somewhere. +func TestEdDSACrossServiceVerify_differentKeysFail(t *testing.T) { + signer := createTestService(t) + _, signKey, _ := ed25519.GenerateKey(rand.Reader) + signer.SetEdDSAKey(signKey) + + verifier := createTestService(t) + _, verKey, _ := ed25519.GenerateKey(rand.Reader) + verifier.SetEdDSAKey(verKey) + + token, _, err := signer.GenerateJWT("ns", "sub", 15*time.Minute) + if err != nil { + t.Fatalf("GenerateJWT: %v", err) + } + if _, err := verifier.ParseAndVerifyJWT(token); err == nil { + t.Fatal("expected verification to fail with different signing keys, got nil error") + } +} diff --git a/core/pkg/gateway/config.go b/core/pkg/gateway/config.go index 41cdebb..323ae48 100644 --- a/core/pkg/gateway/config.go +++ b/core/pkg/gateway/config.go @@ -56,4 +56,15 @@ type Config struct { SFUPort int // Local SFU signaling port to proxy WebSocket connections to TURNDomain string // TURN server domain for credential generation TURNSecret string // HMAC-SHA1 shared secret for TURN credential generation + + // StealthCDNDomain, when set, makes the WebRTC credentials handler + // advertise turns::443 (served by the SNI router). + StealthCDNDomain string + + // Push notification configuration. Push is enabled when at least one + // provider URL/token is set. Tokens stored in the push_devices table + // are encrypted at rest via pkg/secrets using the cluster secret. + NtfyBaseURL string // ntfy server URL (e.g. "http://localhost:8080") + NtfyAuthToken string // optional bearer token for ntfy + ExpoAccessToken string // optional Expo access token } diff --git a/core/pkg/gateway/dependencies.go b/core/pkg/gateway/dependencies.go index eaad2dd..dd6c048 100644 --- a/core/pkg/gateway/dependencies.go +++ b/core/pkg/gateway/dependencies.go @@ -19,10 +19,15 @@ import ( "github.com/DeBrosOfficial/network/pkg/logging" "github.com/DeBrosOfficial/network/pkg/olric" "github.com/DeBrosOfficial/network/pkg/pubsub" + "github.com/DeBrosOfficial/network/pkg/push" + pushexpo "github.com/DeBrosOfficial/network/pkg/push/providers/expo" + pushntfy "github.com/DeBrosOfficial/network/pkg/push/providers/ntfy" "github.com/DeBrosOfficial/network/pkg/rqlite" "github.com/DeBrosOfficial/network/pkg/serverless" "github.com/DeBrosOfficial/network/pkg/serverless/hostfunctions" + "github.com/DeBrosOfficial/network/pkg/serverless/persistent" "github.com/DeBrosOfficial/network/pkg/serverless/triggers" + "github.com/DeBrosOfficial/network/pkg/serverless/wsbridge" "github.com/multiformats/go-multiaddr" olriclib "github.com/olric-data/olric" "go.uber.org/zap" @@ -63,6 +68,34 @@ type Dependencies struct { // PubSub trigger dispatcher (used to wire into PubSubHandlers) PubSubDispatcher *triggers.PubSubDispatcher + // Cron trigger store + scheduler. The scheduler is started by gateway + // lifecycle code after Dependencies is constructed; Stop is called + // during shutdown. + CronTriggerStore *triggers.CronTriggerStore + CronScheduler *triggers.CronScheduler + + // PersistentWSManager tracks long-lived WS function instances. + // Used by the WS handler when fn.WSPersistent=true; nil = disabled. + PersistentWSManager *persistent.Manager + + // WSBridge wires PubSub topics directly to WS clients on this gateway. + // Used by the ws_pubsub_bridge host function. Nil = disabled. + WSBridge *wsbridge.Bridge + + // Push notification dispatcher (legacy single-tier; nil when push + // isn't configured at all). When PushManager is also set, send paths + // route through the manager instead so per-namespace config wins. + PushDispatcher *push.PushDispatcher + PushDeviceStore push.PushDeviceStore + + // PushManager wraps the device store + per-namespace config store so + // tenants self-serve their push provider config via PUT /v1/push/config. + // Nil when push subsystem isn't initialized (cluster secret missing). + // When set, this is the canonical send path; PushDispatcher is the + // fallback used only if Manager is somehow missing. + PushManager *push.Manager + PushConfigStore push.ConfigStore + // Authentication service AuthService *auth.Service } @@ -140,11 +173,7 @@ func initializeRQLite(logger *logging.ColoredLogger, cfg *Config, deps *Dependen // Inject basic auth credentials into DSN if available dsn = injectRQLiteAuth(dsn, cfg.RQLiteUsername, cfg.RQLitePassword) - if strings.Contains(dsn, "?") { - dsn += "&disableClusterDiscovery=true&level=none" - } else { - dsn += "?disableClusterDiscovery=true&level=none" - } + dsn = appendRQLiteQueryParams(dsn) db, err := sql.Open("rqlite", dsn) if err != nil { return fmt.Errorf("failed to open rqlite sql db: %w", err) @@ -157,7 +186,17 @@ func initializeRQLite(logger *logging.ColoredLogger, cfg *Config, deps *Dependen db.SetConnMaxIdleTime(2 * time.Minute) // Maximum idle time before closing deps.SQLDB = db - orm := rqlite.NewClient(db) + // Use the DSN-aware constructor so the ORM client also has a native + // *gorqlite.Connection for atomic Batch operations. If the native dial + // fails, fall back to the stdlib-only client (Batch will be unavailable + // but everything else works). + orm, ormErr := rqlite.NewClientWithDSN(db, dsn) + if ormErr != nil { + logger.ComponentWarn(logging.ComponentGeneral, + "native gorqlite dial failed, atomic Batch will be unavailable", + zap.Error(ormErr)) + orm = rqlite.NewClient(db) + } deps.ORMClient = orm deps.ORMHTTP = rqlite.NewHTTPGateway(orm, "/v1/db") // Set a reasonable timeout for HTTP requests (30 seconds) @@ -172,14 +211,32 @@ func initializeRQLite(logger *logging.ColoredLogger, cfg *Config, deps *Dependen // Apply embedded migrations to ensure schema is up-to-date. // This is critical for namespace gateways whose RQLite instances // don't get migrations from the main cluster RQLiteManager. + // + // Failures here are FATAL: a gateway that can't bring its schema up + // to the version its binary expects will silently corrupt deploys + // later (e.g. INSERTing into missing columns and surfacing as a + // cryptic SQL error to end users). Better to refuse to start with + // a clear actionable error. migCtx, migCancel := context.WithTimeout(context.Background(), 30*time.Second) defer migCancel() if err := rqlite.ApplyEmbeddedMigrations(migCtx, db, migrations.FS, logger.Logger); err != nil { - logger.ComponentWarn(logging.ComponentGeneral, "Failed to apply embedded migrations to gateway RQLite", - zap.Error(err)) - } else { - logger.ComponentInfo(logging.ComponentGeneral, "Embedded migrations applied to gateway RQLite") + return fmt.Errorf("apply embedded migrations failed: %w "+ + "(hint: this gateway can't safely run without its required schema; "+ + "check the underlying RQLite cluster health and re-run startup)", err) } + logger.ComponentInfo(logging.ComponentGeneral, "Embedded migrations applied to gateway RQLite") + + // Schema-version contract: even if the apply call returned nil, verify + // that the highest migration the binary embeds is recorded as applied. + // Catches: + // - silent partial-apply states where the marker row was never written + // - clusters where the binary was upgraded but RQLite has stale schema + // - operator manually deleted rows from schema_migrations + if err := migrations.AssertSchema(migCtx, db); err != nil { + return fmt.Errorf("schema contract violation: %w", err) + } + logger.ComponentInfo(logging.ComponentGeneral, "Schema contract satisfied", + zap.Int("required_version", migrations.RequiredVersion())) return nil } @@ -412,11 +469,39 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe secretsMgr = smImpl } + // Initialize push notification subsystem. + // + // Bug #220 follow-up: the subsystem now ALWAYS initializes when the + // cluster secret is available (so tenants can register devices and + // PUT their per-namespace push config), regardless of whether the + // gateway YAML has a default provider configured. The Manager wraps + // the device store + per-namespace ConfigStore; Send paths route + // through Manager so per-namespace config takes effect. + // + // PushDispatcher (legacy) is set only when YAML defaults exist — + // kept for back-compat with code that hasn't migrated to Manager. + pushDispatcher, pushStore, pushManager, pushCfgStore, err := buildPushDispatcher(cfg, deps.ORMClient, logger) + if err != nil { + // Non-fatal: log and continue. Functions calling push_send will get nil + // (silent no-op) and HTTP /v1/push/* endpoints return 503. + logger.ComponentWarn(logging.ComponentGeneral, + "push notifications disabled (init failed)", zap.Error(err)) + } + deps.PushDispatcher = pushDispatcher + deps.PushDeviceStore = pushStore + deps.PushManager = pushManager + deps.PushConfigStore = pushCfgStore + // Create host functions provider (allows functions to call Orama services) hostFuncsCfg := hostfunctions.HostFunctionsConfig{ IPFSAPIURL: cfg.IPFSAPIURL, HTTPTimeout: 30 * time.Second, } + // WS-PubSub bridge: wire PubSub topics directly to WS clients without + // per-event WASM invocation. The bridge is a thin layer over the + // pubsub adapter + WSManager. + deps.WSBridge = wsbridge.New(pubsubAdapter, deps.ServerlessWSMgr, logger.Logger) + hostFuncs := hostfunctions.NewHostFunctions( deps.ORMClient, olricClient, @@ -424,12 +509,21 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe pubsubAdapter, // pubsub adapter for serverless functions deps.ServerlessWSMgr, secretsMgr, + pushDispatcher, // legacy — fallback when manager isn't wired + pushManager, // bug #220 follow-up — per-namespace config + deps.WSBridge, // may be nil; WSPubSubBridge returns explicit error hostFuncsCfg, logger.Logger, ) - // Create WASM engine with rate limiter - rateLimiter := serverless.NewTokenBucketLimiter(engineCfg.GlobalRateLimitPerMinute) + // Create WASM engine with multi-tier rate limiter (per-(ns, fn, wallet, ip), + // per-(ns, wallet), per-(ns)). The legacy global limit is honored as + // the per-namespace ceiling so no behavior regresses for existing deployments. + rlCfg := serverless.DefaultLimiterConfig() + if engineCfg.GlobalRateLimitPerMinute > 0 { + rlCfg.PerNamespacePerMinute = engineCfg.GlobalRateLimitPerMinute + } + rateLimiter := serverless.NewMultiTierLimiter(rlCfg) engine, err := serverless.NewEngine(engineCfg, registry, hostFuncs, logger.Logger, serverless.WithInvocationLogger(registry), serverless.WithRateLimiter(rateLimiter), @@ -442,6 +536,11 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe // Create invoker deps.ServerlessInvoker = serverless.NewInvoker(engine, registry, hostFuncs, logger.Logger) + // Wire the invoker back into hostFuncs so the function_invoke host + // function can dispatch sub-invocations from inside a WASM function + // (e.g. rpc-router routing client RPCs to per-op handlers). + hostFuncs.SetInvoker(deps.ServerlessInvoker) + // Create PubSub trigger store and dispatcher triggerStore := triggers.NewPubSubTriggerStore(deps.ORMClient, logger.Logger) @@ -456,13 +555,34 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe logger.Logger, ) + // Cron trigger store + scheduler. The scheduler polls + // function_cron_triggers and invokes due rows via the same + // ServerlessInvoker used for PubSub triggers; the ↓ Start call wires + // the goroutine up — Stop is invoked from gateway lifecycle shutdown. + cronStore := triggers.NewCronTriggerStore(deps.ORMClient, logger.Logger) + deps.CronTriggerStore = cronStore + deps.CronScheduler = triggers.NewCronScheduler( + cronStore, + deps.ServerlessInvoker, + logger.Logger, + 30*time.Second, + ) + + // Persistent WS instance manager. Cap from gateway config (TODO: surface + // the knob); 5000 is a sensible default per plan 06. + deps.PersistentWSManager = persistent.NewManager(5000, logger.Logger) + // Create HTTP handlers deps.ServerlessHandlers = serverlesshandlers.NewServerlessHandlers( deps.ServerlessInvoker, + deps.ServerlessEngine, registry, deps.ServerlessWSMgr, triggerStore, + cronStore, deps.PubSubDispatcher, + deps.PersistentWSManager, + deps.WSBridge, secretsMgr, logger.Logger, ) @@ -477,8 +597,21 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe return fmt.Errorf("failed to initialize auth service: %w", err) } - // Load or create EdDSA key for new JWT tokens - edKey, err := loadOrCreateEdSigningKey(cfg.DataDir, logger) + // Load or create EdDSA key for new JWT tokens. Bug #215 fix: when + // cfg.ClusterSecret is set, the key is derived deterministically from + // it via HKDF, so every gateway in the cluster shares the same Ed25519 + // keypair and JWTs verify cross-node. With an empty ClusterSecret the + // per-node legacy behaviour is retained (single-node test deployments). + if cfg.ClusterSecret == "" { + // Loud warning: a multi-node cluster booted without a cluster + // secret reproduces bug #215 (per-gateway random keys, JWTs + // unverifiable cross-node). Single-node test rigs are the only + // legitimate case. + logger.ComponentWarn(logging.ComponentGeneral, + "ClusterSecret is empty; JWT signing keys will be random per-node. "+ + "Multi-node clusters MUST set ClusterSecret or JWTs will not verify across gateways (bug #215).") + } + edKey, err := loadOrCreateEdSigningKey(cfg.DataDir, cfg.ClusterSecret, logger) if err != nil { logger.ComponentWarn(logging.ComponentGeneral, "Failed to load EdDSA signing key; new JWTs will use RS256", zap.Error(err)) @@ -686,3 +819,119 @@ func injectRQLiteAuth(dsn, username, password string) string { } return dsn } + +// appendRQLiteQueryParams adds the standard query parameters to a RQLite DSN: +// +// - `disableClusterDiscovery=true` — gorqlite's discovery /nodes call is +// unreliable when peers are unreachable; we manage topology ourselves. +// - `level=weak` — Bug #235. Reads route to the leader (the only node +// guaranteed to have all committed writes), so a SELECT after an UPDATE +// in the same serverless invocation sees the new state. Previously +// `level=none`, which read from the local follower's possibly-stale +// snapshot. gorqlite's upstream default is `weak`; we were overriding +// to `none` and that hid this bug. +// +// The cost of `weak` over `none` is one HTTP hop to the leader (~1-2ms over +// the WireGuard mesh) and applies only to reads. Writes are unaffected +// because rqlite always redirects them to the leader regardless of `level`. +func appendRQLiteQueryParams(dsn string) string { + const params = "disableClusterDiscovery=true&level=weak" + if strings.Contains(dsn, "?") { + return dsn + "&" + params + } + return dsn + "?" + params +} + +// buildPushDispatcher constructs the push subsystem. +// +// As of bug #220 follow-up, push always initializes when ClusterSecret is +// available, regardless of whether any YAML provider config is set: +// +// - Device store + ConfigStore always build (tenants need to register +// devices and set per-namespace push config even on gateways with no +// YAML defaults). +// - Manager wraps the stores + a YAML-derived Defaults fallback. Each +// namespace can override any default via PUT /v1/push/config. +// - The legacy single-tier dispatcher is built only when YAML defaults +// are non-empty — kept for back-compat with code paths that haven't +// migrated to Manager. +// +// Returns (nil, nil, nil, nil, nil) when ClusterSecret is missing +// (push subsystem disabled — credentials can't be encrypted safely). +// Returns hard error only on store-init failure. +func buildPushDispatcher( + cfg *Config, + db rqlite.Client, + logger *logging.ColoredLogger, +) (*push.PushDispatcher, push.PushDeviceStore, *push.Manager, push.ConfigStore, error) { + if cfg.ClusterSecret == "" { + // Without the cluster secret we can't encrypt credentials at rest. + // Disable the whole push subsystem; HTTP routes return 503. + return nil, nil, nil, nil, nil + } + + store, err := push.NewRqliteDeviceStore(db, cfg.ClusterSecret, logger.Logger) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("init push device store: %w", err) + } + + cfgStore, err := push.NewRqliteConfigStore(db, cfg.ClusterSecret, logger.Logger) + if err != nil { + return nil, nil, nil, nil, fmt.Errorf("init push config store: %w", err) + } + + // ProviderFactory turns a resolved Config into the right set of + // provider instances. Lives here in dependencies.go because this is + // the only place that imports both the manager package and the + // concrete provider sub-packages — keeps push core dep-cycle-free. + factory := func(c push.Config) []push.PushProvider { + var ps []push.PushProvider + if c.NtfyBaseURL != "" { + ps = append(ps, pushntfy.New(pushntfy.Config{ + BaseURL: c.NtfyBaseURL, + AuthToken: c.NtfyAuthToken, + }, logger.Logger)) + } + if c.ExpoAccessToken != "" { + ps = append(ps, pushexpo.New(pushexpo.Config{ + AccessToken: c.ExpoAccessToken, + }, logger.Logger)) + } + return ps + } + + defaults := push.Defaults{ + NtfyBaseURL: cfg.NtfyBaseURL, + NtfyAuthToken: cfg.NtfyAuthToken, + ExpoAccessToken: cfg.ExpoAccessToken, + } + manager := push.NewManager(store, cfgStore, defaults, factory, logger.Logger) + + // Legacy single-tier dispatcher kept ONLY when YAML defaults exist — + // some non-Manager code paths (notably the WASM push_send hostfunc + // before its migration to Manager) still expect a populated + // PushDispatcher. New code routes via Manager. + var legacy *push.PushDispatcher + if !defaults.IsEmpty() { + legacy = push.New(store, logger.Logger) + for _, p := range factory(push.Config{ + NtfyBaseURL: defaults.NtfyBaseURL, + NtfyAuthToken: defaults.NtfyAuthToken, + ExpoAccessToken: defaults.ExpoAccessToken, + }) { + legacy.Register(p) + } + } + + if defaults.NtfyBaseURL != "" { + logger.ComponentInfo(logging.ComponentGeneral, "push default provider: ntfy", + zap.String("base_url", defaults.NtfyBaseURL)) + } + if defaults.ExpoAccessToken != "" { + logger.ComponentInfo(logging.ComponentGeneral, "push default provider: expo configured") + } + logger.ComponentInfo(logging.ComponentGeneral, + "push subsystem initialized; tenants can self-serve via PUT /v1/push/config") + + return legacy, store, manager, cfgStore, nil +} diff --git a/core/pkg/gateway/dependencies_dsn_test.go b/core/pkg/gateway/dependencies_dsn_test.go new file mode 100644 index 0000000..4e3834e --- /dev/null +++ b/core/pkg/gateway/dependencies_dsn_test.go @@ -0,0 +1,59 @@ +package gateway + +import ( + "strings" + "testing" +) + +// TestAppendRQLiteQueryParams_consistencyLevelWeak is the regression guard +// for bug #235. The DSN passed to gorqlite MUST encode `level=weak` so reads +// route to the leader and see all committed writes from earlier in the same +// serverless invocation. `level=none` (the previous default) read from the +// local follower's possibly-stale state and broke `INSERT → UPDATE → SELECT` +// patterns inside host functions. +func TestAppendRQLiteQueryParams_consistencyLevelWeak(t *testing.T) { + got := appendRQLiteQueryParams("http://localhost:5001") + if !strings.Contains(got, "level=weak") { + t.Errorf("DSN missing level=weak (bug #235 regression):\n%s", got) + } + if strings.Contains(got, "level=none") { + t.Errorf("DSN must NOT carry level=none (bug #235):\n%s", got) + } + if !strings.Contains(got, "disableClusterDiscovery=true") { + t.Errorf("DSN missing disableClusterDiscovery=true:\n%s", got) + } +} + +// TestAppendRQLiteQueryParams_existingQueryString — when the inbound DSN +// already has a `?param=value` segment (e.g. authentication appended +// upstream), the new params must be `&`-joined, not start a fresh `?`. +func TestAppendRQLiteQueryParams_existingQueryString(t *testing.T) { + got := appendRQLiteQueryParams("http://localhost:5001?foo=bar") + if strings.Count(got, "?") != 1 { + t.Errorf("expected exactly one '?' in DSN, got: %s", got) + } + if !strings.Contains(got, "?foo=bar&disableClusterDiscovery=true&level=weak") { + t.Errorf("DSN didn't append params with '&' join:\n%s", got) + } +} + +// TestAppendRQLiteQueryParams_noExistingQueryString — when no `?` is present, +// the params must be introduced with a `?` not an `&`. +func TestAppendRQLiteQueryParams_noExistingQueryString(t *testing.T) { + got := appendRQLiteQueryParams("http://localhost:5001") + if !strings.HasSuffix(got, "?disableClusterDiscovery=true&level=weak") { + t.Errorf("DSN didn't introduce query string with '?':\n%s", got) + } +} + +// TestAppendRQLiteQueryParams_preservesAuthCredentials — credentials injected +// upstream by injectRQLiteAuth must survive the param append unchanged. +func TestAppendRQLiteQueryParams_preservesAuthCredentials(t *testing.T) { + got := appendRQLiteQueryParams("http://orama:secret@localhost:5001") + if !strings.Contains(got, "orama:secret@localhost:5001") { + t.Errorf("auth credentials lost:\n%s", got) + } + if !strings.Contains(got, "level=weak") { + t.Errorf("level=weak missing after auth-injected DSN:\n%s", got) + } +} diff --git a/core/pkg/gateway/gateway.go b/core/pkg/gateway/gateway.go index 389ab00..d132657 100644 --- a/core/pkg/gateway/gateway.go +++ b/core/pkg/gateway/gateway.go @@ -28,10 +28,12 @@ import ( "github.com/DeBrosOfficial/network/pkg/gateway/handlers/cache" deploymentshandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/deployments" pubsubhandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/pubsub" + pushhandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/push" serverlesshandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/serverless" enrollhandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/enroll" joinhandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/join" webrtchandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/webrtc" + operatorhandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/operator" vaulthandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/vault" wireguardhandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/wireguard" sqlitehandlers "github.com/DeBrosOfficial/network/pkg/gateway/handlers/sqlite" @@ -42,6 +44,8 @@ import ( "github.com/DeBrosOfficial/network/pkg/olric" "github.com/DeBrosOfficial/network/pkg/rqlite" "github.com/DeBrosOfficial/network/pkg/serverless" + "github.com/DeBrosOfficial/network/pkg/serverless/persistent" + "github.com/DeBrosOfficial/network/pkg/serverless/triggers" _ "github.com/mattn/go-sqlite3" "go.uber.org/zap" ) @@ -82,13 +86,17 @@ type Gateway struct { mu sync.RWMutex presenceMu sync.RWMutex pubsubHandlers *pubsubhandlers.PubSubHandlers + pushHandlers *pushhandlers.Handlers // Serverless function engine - serverlessEngine *serverless.Engine - serverlessRegistry *serverless.Registry - serverlessInvoker *serverless.Invoker - serverlessWSMgr *serverless.WSManager - serverlessHandlers *serverlesshandlers.ServerlessHandlers + serverlessEngine *serverless.Engine + serverlessRegistry *serverless.Registry + serverlessInvoker *serverless.Invoker + serverlessWSMgr *serverless.WSManager + serverlessHandlers *serverlesshandlers.ServerlessHandlers + pubsubDispatcher *triggers.PubSubDispatcher + persistentWSManager *persistent.Manager + cronScheduler *triggers.CronScheduler // Authentication service authService *auth.Service @@ -168,7 +176,8 @@ type Gateway struct { proxyTransport *http.Transport // Vault proxy handlers - vaultHandlers *vaulthandlers.Handlers + vaultHandlers *vaulthandlers.Handlers + operatorHandler *operatorhandlers.Handler // Namespace health state (local service probes + hourly reconciliation) nsHealth *namespaceHealthState @@ -340,10 +349,39 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) { // Wire PubSub trigger dispatch if serverless is available if deps.PubSubDispatcher != nil { + gw.pubsubDispatcher = deps.PubSubDispatcher gw.pubsubHandlers.SetOnPublish(func(ctx context.Context, namespace, topic string, data []byte) { deps.PubSubDispatcher.Dispatch(ctx, namespace, topic, data, 0) }) } + if deps.PersistentWSManager != nil { + gw.persistentWSManager = deps.PersistentWSManager + } + if deps.CronScheduler != nil { + gw.cronScheduler = deps.CronScheduler + // Background goroutine — Stop is called from gateway.Close. + gw.cronScheduler.Start(context.Background()) + } + + // Push notification handlers — disabled when no provider is configured. + // The handlers themselves return 503 if dispatcher/store is nil; we + // register them unconditionally so the routes always exist with a + // predictable shape. + // + // Prefer the Manager-backed constructor (bug #220 follow-up) so + // tenants can self-serve their push config via PUT /v1/push/config. + // Fall back to the legacy constructor when only the YAML-derived + // dispatcher is available (older deployments without ClusterSecret). + if deps.PushManager != nil { + gw.pushHandlers = pushhandlers.NewHandlersWithManager( + deps.PushManager, + deps.PushConfigStore, + deps.PushDeviceStore, + logger, + ) + } else if deps.PushDispatcher != nil { + gw.pushHandlers = pushhandlers.NewHandlers(deps.PushDispatcher, deps.PushDeviceStore, logger) + } if cfg.WebRTCEnabled && cfg.SFUPort > 0 { gw.webrtcHandlers = webrtchandlers.NewWebRTCHandlers( @@ -405,6 +443,7 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) { gw.joinHandler = joinhandlers.NewHandler(logger.Logger, deps.ORMClient, cfg.DataDir) gw.enrollHandler = enrollhandlers.NewHandler(logger.Logger, deps.ORMClient, cfg.DataDir) gw.vaultHandlers = vaulthandlers.NewHandlers(logger, deps.Client) + gw.operatorHandler = operatorhandlers.NewHandler(logger.Logger, deps.ORMClient) } // Initialize deployment system diff --git a/core/pkg/gateway/handlers/deployments/mocks_test.go b/core/pkg/gateway/handlers/deployments/mocks_test.go index 491048d..eb81040 100644 --- a/core/pkg/gateway/handlers/deployments/mocks_test.go +++ b/core/pkg/gateway/handlers/deployments/mocks_test.go @@ -162,6 +162,15 @@ func (m *mockRQLiteClient) Tx(ctx context.Context, fn func(tx rqlite.Tx) error) return nil } +func (m *mockRQLiteClient) Batch(ctx context.Context, ops []rqlite.BatchOp) (*rqlite.BatchResult, error) { + return &rqlite.BatchResult{Committed: true, Results: make([]rqlite.OpResult, len(ops))}, nil +} + +func (m *mockRQLiteClient) BatchWithSeq(ctx context.Context, namespace string, ops []rqlite.BatchOp) (*rqlite.BatchResult, int64, error) { + res, err := m.Batch(ctx, ops) + return res, 1, err +} + // mockProcessManager implements a mock process manager for testing type mockProcessManager struct { StartFunc func(ctx context.Context, deployment *deployments.Deployment, workDir string) error diff --git a/core/pkg/gateway/handlers/join/handler.go b/core/pkg/gateway/handlers/join/handler.go index 678c82f..dd79485 100644 --- a/core/pkg/gateway/handlers/join/handler.go +++ b/core/pkg/gateway/handlers/join/handler.go @@ -129,6 +129,9 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) { return } + // 1b. Look up the operator wallet from the consumed token (may be empty for legacy tokens) + operatorWallet := h.tokenOperatorWallet(ctx, req.Token) + // 2. Clean up stale WG entries for this public IP (from previous installs). // This prevents ghost peers: old rows with different node_id/wg_key that // the sync loop would keep trying to reach. @@ -150,8 +153,8 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) { // 4. Register WG peer in database nodeID := fmt.Sprintf("node-%s", wgIP) // temporary ID based on WG IP _, err = h.rqliteClient.Exec(ctx, - "INSERT OR REPLACE INTO wireguard_peers (node_id, wg_ip, public_key, public_ip, wg_port) VALUES (?, ?, ?, ?, ?)", - nodeID, wgIP, req.WGPublicKey, req.PublicIP, 51820) + "INSERT OR REPLACE INTO wireguard_peers (node_id, wg_ip, public_key, public_ip, wg_port, operator_wallet) VALUES (?, ?, ?, ?, ?, ?)", + nodeID, wgIP, req.WGPublicKey, req.PublicIP, 51820, operatorWallet) if err != nil { h.logger.Error("failed to register WG peer", zap.Error(err)) http.Error(w, "failed to register peer", http.StatusInternalServerError) @@ -307,6 +310,22 @@ func (h *Handler) consumeToken(ctx context.Context, token, usedByIP string) erro return nil } +// tokenOperatorWallet looks up the operator_wallet from a consumed invite token. +// Returns empty string if the token has no operator (legacy tokens). +func (h *Handler) tokenOperatorWallet(ctx context.Context, token string) string { + var rows []struct { + Wallet string `db:"operator_wallet"` + } + if err := h.rqliteClient.Query(ctx, &rows, + "SELECT COALESCE(operator_wallet, '') AS operator_wallet FROM invite_tokens WHERE token = ?", token); err != nil { + return "" + } + if len(rows) > 0 { + return rows[0].Wallet + } + return "" +} + // assignWGIP finds the next available 10.0.0.x IP by querying all peers and // finding the numerically highest IP. This avoids lexicographic comparison issues // where MAX("10.0.0.9") > MAX("10.0.0.10") in SQL string comparison. diff --git a/core/pkg/gateway/handlers/operator/handler.go b/core/pkg/gateway/handlers/operator/handler.go new file mode 100644 index 0000000..d11d2e1 --- /dev/null +++ b/core/pkg/gateway/handlers/operator/handler.go @@ -0,0 +1,99 @@ +// Package operator provides HTTP handlers for node operator management. +// +// Operators authenticate via wallet JWT (same auth flow as namespaces). +// Each operator's nodes are tracked by their wallet address in the +// dns_nodes and wireguard_peers tables. +package operator + +import ( + "context" + "encoding/json" + "net/http" + "strings" + + "github.com/DeBrosOfficial/network/pkg/gateway/auth" + "github.com/DeBrosOfficial/network/pkg/gateway/ctxkeys" + "github.com/DeBrosOfficial/network/pkg/rqlite" + "go.uber.org/zap" +) + +// Handler provides HTTP handlers for operator node management. +type Handler struct { + logger *zap.Logger + rqliteClient rqlite.Client +} + +// NewHandler creates an operator handler. +func NewHandler(logger *zap.Logger, rqliteClient rqlite.Client) *Handler { + return &Handler{ + logger: logger, + rqliteClient: rqliteClient, + } +} + +// walletFromRequest extracts the operator's wallet address from the request. +// Supports both JWT auth (wallet in Sub claim) and API key auth (wallet looked +// up from wallet_api_keys table). +func (h *Handler) walletFromRequest(r *http.Request) string { + // 1. Try JWT claims first (wallet JWT auth sets Sub = "0x...") + if claims, ok := r.Context().Value(ctxkeys.JWT).(*auth.JWTClaims); ok && claims != nil { + sub := strings.TrimSpace(claims.Sub) + if strings.HasPrefix(strings.ToLower(sub), "0x") { + return sub + } + // JWT with API key subject + if strings.HasPrefix(strings.ToLower(sub), "ak_") { + return h.resolveWalletFromAPIKey(r.Context(), sub) + } + } + + // 2. Try API key from context (X-API-Key header, no JWT) + if apiKey, ok := r.Context().Value(ctxkeys.APIKey).(string); ok && apiKey != "" { + return h.resolveWalletFromAPIKey(r.Context(), apiKey) + } + + return "" +} + +// resolveWalletFromAPIKey looks up the wallet address linked to an API key. +// It queries namespace_ownership for a wallet-type owner of the namespace. +func (h *Handler) resolveWalletFromAPIKey(ctx context.Context, apiKeySub string) string { + if h.rqliteClient == nil { + return "" + } + ns := extractNamespace(apiKeySub) + if ns == "" { + return "" + } + var rows []struct { + OwnerID string `db:"owner_id"` + } + if err := h.rqliteClient.Query(ctx, &rows, + `SELECT no.owner_id FROM namespace_ownership no + JOIN namespaces n ON no.namespace_id = n.id + WHERE n.name = ? AND no.owner_type = 'wallet' + LIMIT 1`, + ns); err != nil || len(rows) == 0 { + return "" + } + return rows[0].OwnerID +} + +// extractNamespace extracts the namespace from an API key subject like "ak_xxx:namespace". +func extractNamespace(apiKeySub string) string { + parts := strings.SplitN(apiKeySub, ":", 2) + if len(parts) == 2 { + return parts[1] + } + return apiKeySub +} + +func writeJSON(w http.ResponseWriter, status int, v interface{}) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + json.NewEncoder(w).Encode(v) +} + +func writeError(w http.ResponseWriter, status int, msg string) { + writeJSON(w, status, map[string]string{"error": msg}) +} diff --git a/core/pkg/gateway/handlers/operator/handler_test.go b/core/pkg/gateway/handlers/operator/handler_test.go new file mode 100644 index 0000000..b136c9a --- /dev/null +++ b/core/pkg/gateway/handlers/operator/handler_test.go @@ -0,0 +1,242 @@ +package operator + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/DeBrosOfficial/network/pkg/gateway/auth" + "github.com/DeBrosOfficial/network/pkg/gateway/ctxkeys" +) + +func TestWalletFromRequest_withClaims(t *testing.T) { + h := NewHandler(nil, nil) + r := httptest.NewRequest(http.MethodGet, "/", nil) + claims := &auth.JWTClaims{Sub: "0xabc123"} + ctx := context.WithValue(r.Context(), ctxkeys.JWT, claims) + r = r.WithContext(ctx) + + wallet := h.walletFromRequest(r) + if wallet != "0xabc123" { + t.Errorf("wallet = %q, want %q", wallet, "0xabc123") + } +} + +func TestWalletFromRequest_noClaims(t *testing.T) { + h := NewHandler(nil, nil) + r := httptest.NewRequest(http.MethodGet, "/", nil) + + wallet := h.walletFromRequest(r) + if wallet != "" { + t.Errorf("wallet = %q, want empty", wallet) + } +} + +func TestWalletFromRequest_nilClaims(t *testing.T) { + h := NewHandler(nil, nil) + r := httptest.NewRequest(http.MethodGet, "/", nil) + ctx := context.WithValue(r.Context(), ctxkeys.JWT, (*auth.JWTClaims)(nil)) + r = r.WithContext(ctx) + + wallet := h.walletFromRequest(r) + if wallet != "" { + t.Errorf("wallet = %q, want empty", wallet) + } +} + +func TestWalletFromRequest_apiKeyContext(t *testing.T) { + // When auth middleware sets ctxkeys.APIKey (no JWT), walletFromRequest + // should try to resolve via the API key. With nil rqliteClient it returns + // empty (can't query DB), but it shouldn't panic. + h := NewHandler(nil, nil) + r := httptest.NewRequest(http.MethodGet, "/", nil) + ctx := context.WithValue(r.Context(), ctxkeys.APIKey, "ak_test:myns") + r = r.WithContext(ctx) + + // Should not panic — returns empty because no DB to query + wallet := h.walletFromRequest(r) + if wallet != "" { + t.Errorf("wallet = %q, want empty (no DB)", wallet) + } +} + +func TestExtractNamespace(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"ak_abc123:myns", "myns"}, + {"ak_abc123", "ak_abc123"}, + {"", ""}, + } + for _, tt := range tests { + got := extractNamespace(tt.input) + if got != tt.want { + t.Errorf("extractNamespace(%q) = %q, want %q", tt.input, got, tt.want) + } + } +} + +func TestDecodeJSON_valid(t *testing.T) { + body := strings.NewReader(`{"node_id":"test-node","environment":"devnet"}`) + r := httptest.NewRequest(http.MethodPost, "/", body) + + var req RegisterRequest + if err := decodeJSON(r, &req); err != nil { + t.Fatalf("decodeJSON: %v", err) + } + if req.NodeID != "test-node" { + t.Errorf("NodeID = %q, want %q", req.NodeID, "test-node") + } + if req.Environment != "devnet" { + t.Errorf("Environment = %q, want %q", req.Environment, "devnet") + } +} + +func TestDecodeJSON_invalid(t *testing.T) { + body := strings.NewReader(`not-json`) + r := httptest.NewRequest(http.MethodPost, "/", body) + + var req RegisterRequest + if err := decodeJSON(r, &req); err == nil { + t.Error("expected error for invalid JSON") + } +} + +func TestHandleInvite_noAuth(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/v1/operator/invite", nil) + + h.HandleInvite(w, r) + + if w.Code != http.StatusUnauthorized { + t.Errorf("status = %d, want %d", w.Code, http.StatusUnauthorized) + } +} + +func TestHandleInvite_wrongMethod(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/v1/operator/invite", nil) + + h.HandleInvite(w, r) + + if w.Code != http.StatusMethodNotAllowed { + t.Errorf("status = %d, want %d", w.Code, http.StatusMethodNotAllowed) + } +} + +func TestHandleListNodes_noAuth(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodGet, "/v1/operator/nodes", nil) + + h.HandleListNodes(w, r) + + if w.Code != http.StatusUnauthorized { + t.Errorf("status = %d, want %d", w.Code, http.StatusUnauthorized) + } +} + +func TestHandleListNodes_wrongMethod(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/v1/operator/nodes", nil) + + h.HandleListNodes(w, r) + + if w.Code != http.StatusMethodNotAllowed { + t.Errorf("status = %d, want %d", w.Code, http.StatusMethodNotAllowed) + } +} + +func TestHandleRegister_noAuth(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/v1/operator/node/register", strings.NewReader(`{"node_id":"test"}`)) + + h.HandleRegister(w, r) + + if w.Code != http.StatusUnauthorized { + t.Errorf("status = %d, want %d", w.Code, http.StatusUnauthorized) + } +} + +func TestHandleRegister_missingFields(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/v1/operator/node/register", strings.NewReader(`{}`)) + claims := &auth.JWTClaims{Sub: "0xabc"} + r = r.WithContext(context.WithValue(r.Context(), ctxkeys.JWT, claims)) + + h.HandleRegister(w, r) + + if w.Code != http.StatusBadRequest { + t.Errorf("status = %d, want %d", w.Code, http.StatusBadRequest) + } +} + +func TestHandleRegister_invalidEnvironment(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/v1/operator/node/register", + strings.NewReader(`{"node_id":"test","environment":""}`)) + claims := &auth.JWTClaims{Sub: "0xabc"} + r = r.WithContext(context.WithValue(r.Context(), ctxkeys.JWT, claims)) + + h.HandleRegister(w, r) + + if w.Code != http.StatusBadRequest { + t.Errorf("status = %d, want %d", w.Code, http.StatusBadRequest) + } +} + +func TestHandleRegister_invalidRole(t *testing.T) { + h := NewHandler(nil, nil) + w := httptest.NewRecorder() + r := httptest.NewRequest(http.MethodPost, "/v1/operator/node/register", + strings.NewReader(`{"node_id":"test","role":"admin"}`)) + claims := &auth.JWTClaims{Sub: "0xabc"} + r = r.WithContext(context.WithValue(r.Context(), ctxkeys.JWT, claims)) + + h.HandleRegister(w, r) + + if w.Code != http.StatusBadRequest { + t.Errorf("status = %d, want %d", w.Code, http.StatusBadRequest) + } +} + +func TestAllowedEnvironments(t *testing.T) { + valid := []string{"devnet", "testnet", "sandbox", "production", "mainnet"} + invalid := []string{"staging", "local", "