feat(gateway): implement self-service tenant push notifications

- Add `namespace_push_config` table for per-namespace provider settings
- Introduce `cluster_secret_path` to enable deterministic JWT signing and
  AES-256-GCM encryption for push credentials
- Update gateway config to support per-namespace overrides of push
  notification providers (ntfy/Expo)
- Bump version to 0.122.3
This commit is contained in:
anonpenguin23 2026-05-08 11:23:53 +03:00
parent b5f6fb4497
commit f55c7269cd
41 changed files with 3319 additions and 189 deletions

View File

@ -63,7 +63,7 @@ test-e2e-quick:
.PHONY: build clean test deps tidy fmt vet lint install-hooks push-devnet push-testnet rollout-devnet rollout-testnet release .PHONY: build clean test deps tidy fmt vet lint install-hooks push-devnet push-testnet rollout-devnet rollout-testnet release
VERSION := 0.122.2 VERSION := 0.122.3
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)'

BIN
core/cli Executable file

Binary file not shown.

View File

@ -77,21 +77,26 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
} }
type yamlCfg struct { type yamlCfg struct {
ListenAddr string `yaml:"listen_addr"` ListenAddr string `yaml:"listen_addr"`
ClientNamespace string `yaml:"client_namespace"` ClientNamespace string `yaml:"client_namespace"`
RQLiteDSN string `yaml:"rqlite_dsn"` RQLiteDSN string `yaml:"rqlite_dsn"`
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"` GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"`
Peers []string `yaml:"bootstrap_peers"` Peers []string `yaml:"bootstrap_peers"`
EnableHTTPS bool `yaml:"enable_https"` EnableHTTPS bool `yaml:"enable_https"`
DomainName string `yaml:"domain_name"` DomainName string `yaml:"domain_name"`
TLSCacheDir string `yaml:"tls_cache_dir"` TLSCacheDir string `yaml:"tls_cache_dir"`
OlricServers []string `yaml:"olric_servers"` OlricServers []string `yaml:"olric_servers"`
OlricTimeout string `yaml:"olric_timeout"` OlricTimeout string `yaml:"olric_timeout"`
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"` IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"`
IPFSAPIURL string `yaml:"ipfs_api_url"` IPFSAPIURL string `yaml:"ipfs_api_url"`
IPFSTimeout string `yaml:"ipfs_timeout"` IPFSTimeout string `yaml:"ipfs_timeout"`
IPFSReplicationFactor int `yaml:"ipfs_replication_factor"` IPFSReplicationFactor int `yaml:"ipfs_replication_factor"`
WebRTC yamlWebRTCCfg `yaml:"webrtc"` WebRTC yamlWebRTCCfg `yaml:"webrtc"`
// ClusterSecretPath: see GatewayYAMLConfig docstring. Optional;
// when set, the standalone gateway reads the file at this path
// and populates cfg.ClusterSecret so JWT signing keys can be
// derived deterministically (bug #215 fix).
ClusterSecretPath string `yaml:"cluster_secret_path"`
} }
data, err := os.ReadFile(configPath) data, err := os.ReadFile(configPath)
@ -200,6 +205,30 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
cfg.IPFSReplicationFactor = y.IPFSReplicationFactor cfg.IPFSReplicationFactor = y.IPFSReplicationFactor
} }
// Cluster secret — bug #215 fix. The host-managed gateway in
// pkg/node/gateway.go reads this from a known on-disk path; the
// standalone binary (used by namespace gateways via systemd) needs the
// same access so it can derive the cluster-wide Ed25519 JWT signing
// key. Without this, namespace gateways had per-node random keys and
// JWTs minted on one node were unverifiable on another, leaving
// `caller_jwt_subject` empty in serverless host functions.
if path := strings.TrimSpace(y.ClusterSecretPath); path != "" {
secretBytes, err := os.ReadFile(path)
if err != nil {
logger.ComponentError(logging.ComponentGeneral,
"cluster_secret_path is set but the file is unreadable; "+
"JWTs will use a per-node random signing key and will not "+
"verify cross-node — bug #215 will reproduce",
zap.String("path", path),
zap.Error(err))
} else {
cfg.ClusterSecret = strings.TrimSpace(string(secretBytes))
logger.ComponentInfo(logging.ComponentGeneral,
"Loaded cluster secret for cluster-wide JWT signing key derivation",
zap.String("path", path))
}
}
// WebRTC configuration // WebRTC configuration
cfg.WebRTCEnabled = y.WebRTC.Enabled cfg.WebRTCEnabled = y.WebRTC.Enabled
if y.WebRTC.SFUPort > 0 { if y.WebRTC.SFUPort > 0 {

View File

@ -489,25 +489,59 @@ sudo cp caddy-root-ca.crt /usr/local/share/ca-certificates/caddy-root-ca.crt
sudo update-ca-certificates sudo update-ca-certificates
``` ```
## Push notifications (optional, per gateway) ## Push notifications
Push routes (`/v1/push/devices`, `/v1/push/send`) are always registered but Push provider configuration is **tenant-self-service** as of bug #220
return `503 SERVICE_UNAVAILABLE` until at least one provider is configured follow-up. Tenants set their own ntfy / Expo credentials via authenticated
in the gateway config: HTTP — operators no longer need to edit YAML and restart for every namespace
that wants push.
```yaml ### Tenant flow (no operator involvement)
# In your namespace gateway config:
push: ```bash
ntfy_base_url: "http://localhost:8080" # self-hosted ntfy (preferred for privacy) # Set per-namespace config
expo_access_token: "..." # Expo push (alternative; client SDK lock-in) curl -X PUT https://ns-anchat-test.orama-devnet.network/v1/push/config \
-H 'Authorization: Bearer <user-jwt>' \
-H 'Content-Type: application/json' \
-d '{"ntfy_base_url": "https://ntfy.sh"}'
# Read current config (secrets redacted to booleans)
curl https://ns-anchat-test.orama-devnet.network/v1/push/config \
-H 'Authorization: Bearer <user-jwt>'
# Clear (push reverts to gateway YAML defaults, or 503 if no defaults)
curl -X DELETE https://ns-anchat-test.orama-devnet.network/v1/push/config \
-H 'Authorization: Bearer <user-jwt>'
``` ```
Restart the gateway after editing. A `GET /v1/push/devices` call (with valid Per-namespace config takes effect on the NEXT push send (the cached
auth) will then return `200` instead of `503`. dispatcher is invalidated on PUT/DELETE). No restart needed.
The 503 body explicitly names the config knobs operators need to set, so ### Operator flow (cluster-wide defaults — optional)
tenants getting "Push not configured" can self-diagnose without filing a
ticket. (Bug #220 audit fix.) Operators can still seed defaults in the gateway YAML. Per-namespace config
OVERRIDES the defaults; namespaces with no row inherit them.
```yaml
# Cluster-wide push defaults (optional; tenants override per-namespace)
push:
ntfy_base_url: "https://ntfy.sh" # default for namespaces with no override
expo_access_token: "..." # default Expo token
```
### Encryption
Sensitive credentials (`ntfy_auth_token`, `expo_access_token`) are
AES-256-GCM-encrypted at rest in the `namespace_push_config` table using
a key derived from the cluster secret. The GET endpoint returns boolean
`has_X` flags only — credentials are NEVER echoed back over HTTP.
### Disabling push entirely
If `cluster_secret` isn't configured on the gateway, the push subsystem
is disabled and `/v1/push/*` returns 503. To enable: set the cluster secret
and restart. (This is the only operator-side restart still required, and
it's a one-time action at gateway provisioning.)
## Project Structure ## Project Structure

View File

@ -194,6 +194,49 @@ The legacy `db_execute` is kept indefinitely so existing functions don't break.
| `log_info(message)` | Log info-level message (captured in invocation logs). | | `log_info(message)` | Log info-level message (captured in invocation logs). |
| `log_error(message)` | Log error-level message. | | `log_error(message)` | Log error-level message. |
## Configuring Push Notifications (per-namespace)
Push providers (ntfy / Expo) are configured **per namespace** by the tenant —
no operator involvement, no SSH access required. Set, read, or clear via:
```bash
# Set / update (sensitive credentials are encrypted at rest)
curl -X PUT https://ns-myapp.example.com/v1/push/config \
-H 'Authorization: Bearer <user-jwt>' \
-H 'Content-Type: application/json' \
-d '{
"ntfy_base_url": "https://ntfy.sh",
"ntfy_auth_token": "tk_…"
}'
# Read (sensitive fields redacted to booleans)
curl https://ns-myapp.example.com/v1/push/config \
-H 'Authorization: Bearer <user-jwt>'
# Clear (push reverts to gateway-wide defaults if any, else 503)
curl -X DELETE https://ns-myapp.example.com/v1/push/config \
-H 'Authorization: Bearer <user-jwt>'
```
### Field semantics
| Field | Sensitive? | Notes |
|---|---|---|
| `ntfy_base_url` | No | URL of the ntfy server. `https://ntfy.sh` works for testing. |
| `ntfy_auth_token` | Yes | Optional bearer token sent to ntfy. Encrypted at rest. |
| `expo_access_token` | Yes | Expo Push API access token. Encrypted at rest. |
PUT semantics are **field-level** — a `null` (or omitted) field leaves the
existing value alone; an explicit empty string clears just that field. To
clear EVERYTHING use DELETE.
After a PUT the next `push_send` (host call) or `POST /v1/push/send` uses
the new providers — the cached dispatcher is invalidated automatically.
If no per-namespace config is set AND the gateway has no YAML defaults, the
push endpoints return **503 SERVICE_UNAVAILABLE** with a message naming the
exact config to set.
## Managing Secrets ## Managing Secrets
Secrets are encrypted at rest (AES-256-GCM) and scoped to your namespace. Functions read them via `get_secret("name")` at runtime. Secrets are encrypted at rest (AES-256-GCM) and scoped to your namespace. Functions read them via `get_secret("name")` at runtime.

View File

@ -0,0 +1,26 @@
-- =============================================================================
-- 026_namespace_push_config.sql
--
-- Per-namespace push notification provider configuration. Tenants set their
-- own ntfy / expo credentials via PUT /v1/push/config without operator
-- involvement (bug #220 follow-up — self-service tenant config).
--
-- Sensitive credentials (auth tokens) are AES-256-GCM ciphertext via
-- pkg/secrets, prefix 'enc:'. Non-secret URLs (ntfy_base_url) stored
-- plaintext — they leak no security material.
--
-- The gateway YAML config remains as a global fallback / default. A row
-- in this table OVERRIDES the YAML for that namespace; absence falls back.
-- =============================================================================
CREATE TABLE IF NOT EXISTS namespace_push_config (
namespace TEXT PRIMARY KEY,
-- ntfy provider config (URL is non-secret; auth token is)
ntfy_base_url TEXT,
ntfy_auth_token_encrypted TEXT,
-- expo provider config (the access token IS sensitive)
expo_access_token_encrypted TEXT,
-- Audit metadata: who set this, and when (last update wins).
updated_at INTEGER NOT NULL,
updated_by TEXT
);

View File

@ -98,7 +98,7 @@ Examples:
return pushDirect(nodes, archivePath) return pushDirect(nodes, archivePath)
} }
// Multi-node with --fanout: use agent forwarding // Multi-node with --fanout: upload to hub, fan out server-to-server
return pushFanout(nodes, archivePath) return pushFanout(nodes, archivePath)
}, },
} }
@ -135,27 +135,35 @@ func pushDirect(nodes []inspector.Node, archivePath string) error {
return nil return nil
} }
// pushFanout uploads the archive to the first node, then fans out server-to-server // pushFanout uploads the archive to the first node (hub), then fans out
// using SSH agent forwarding. // server-to-server: the hub SCPs the archive to all other nodes in parallel
// and SSHes in to extract it.
//
// Key design — no SSH agent forwarding:
//
// The previous implementation loaded all N node keys into the system ssh-agent
// and used agent forwarding (-A) so the hub could reach targets. That caused
// "Too many authentication failures": when the hub connected to a target, the
// forwarded agent offered all N keys sequentially; if N exceeds the server's
// MaxAuthTries (default 6 on most distros), the server disconnects before the
// correct key is tried.
//
// Fix: PrepareNodeKeys (called by the parent command) already fetched and wrote
// each node's private key to a temp file (node.SSHKey). We base64-encode each
// key and embed it directly in the fanout bash script. On the hub, the script
// writes each key to its own mktemp file, uses -o IdentitiesOnly=yes -i $K
// (only ONE key offered per connection), and deletes the temp file immediately.
// No ssh-agent involved on either end; MaxAuthTries is irrelevant.
func pushFanout(nodes []inspector.Node, archivePath string) error { func pushFanout(nodes []inspector.Node, archivePath string) error {
fmt.Printf("Pushing to %d node(s) (fanout)...\n\n", len(nodes)) fmt.Printf("Pushing to %d node(s) (fanout)...\n\n", len(nodes))
hub := nodes[0] hub := nodes[0]
targets := nodes[1:] targets := nodes[1:]
remotePath := "/tmp/" + filepath.Base(archivePath) remotePath := "/tmp/" + filepath.Base(archivePath)
// Hub extraction keeps the archive so it can be fanned out to targets.
// The cleanup at the end removes it.
hubExtractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama", remotePath)
// Target extraction deletes the archive after extracting.
targetExtractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s", remotePath, remotePath)
// Load SSH keys into the system ssh-agent for agent forwarding // Hub keeps the archive on disk after extracting — targets SCP it from here.
fmt.Println(" Loading SSH keys into agent...") // The archive is removed from the hub at the end of this function.
if err := remotessh.LoadAgentKeys(nodes); err != nil { hubExtractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama", remotePath)
fmt.Printf(" Warning: failed to load agent keys: %v\n", err)
fmt.Println(" Falling back to direct push...")
return pushDirect(nodes, archivePath)
}
// Upload archive to hub // Upload archive to hub
fmt.Printf(" %s (hub): uploading...", hub.Host) fmt.Printf(" %s (hub): uploading...", hub.Host)
@ -168,27 +176,53 @@ func pushFanout(nodes []inspector.Node, archivePath string) error {
} }
fmt.Println(" OK") fmt.Println(" OK")
// Build the fanout command — hub SCPs to all targets in parallel // Build the fanout script. Each target gets its own shell subshell that:
// 1. Writes the target's SSH private key to a mktemp file ($K).
// 2. SCPs the archive from the hub's local path to the target.
// 3. SSHes into the target to extract it.
// 4. Deletes $K — key material never lingers on the hub.
//
// All subshells run in parallel (&); a final `wait` collects them.
// The entire script is base64-encoded before transmission to avoid shell
// quoting conflicts (the script contains both single and double quotes).
var fanoutParts []string var fanoutParts []string
for _, t := range targets { for _, t := range targets {
scpCmd := fmt.Sprintf( keyBytes, err := os.ReadFile(t.SSHKey)
"scp -o StrictHostKeyChecking=accept-new -o IdentitiesOnly=no %s %s@%s:%s && ssh -o StrictHostKeyChecking=accept-new %s@%s 'sudo bash -c \"%s\"' && echo '%s: done'", if err != nil {
// SSHKey was populated by PrepareNodeKeys; this should never
// happen unless the temp file was somehow deleted mid-run.
fmt.Printf(" Warning: could not read key for %s: %v (skipping)\n", t.Host, err)
continue
}
// base64 alphabet is [A-Za-z0-9+/=] — no shell metacharacters —
// safe to embed in single-quoted strings inside the script.
keyB64 := base64.StdEncoding.EncodeToString(keyBytes)
part := fmt.Sprintf(
"(K=$(mktemp) && echo '%s' | base64 -d >\"$K\" && chmod 600 \"$K\" && "+
"scp -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30 -o IdentitiesOnly=yes -i \"$K\" %s %s@%s:%s && "+
"ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30 -o IdentitiesOnly=yes -i \"$K\" %s@%s "+
"'sudo bash -c \"mkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s\"' && "+
"rm -f \"$K\" && echo '%s: done' || (rm -f \"$K\" 2>/dev/null; echo '%s: FAILED')) &",
keyB64,
remotePath, t.User, t.Host, remotePath, remotePath, t.User, t.Host, remotePath,
t.User, t.Host, targetExtractCmd, t.User, t.Host,
t.Host, remotePath, remotePath,
t.Host, t.Host,
) )
fanoutParts = append(fanoutParts, "("+scpCmd+") &") fanoutParts = append(fanoutParts, part)
} }
fanoutParts = append(fanoutParts, "wait", "echo 'Fanout complete'") fanoutParts = append(fanoutParts, "wait", "echo 'Fanout complete'")
fanoutScript := strings.Join(fanoutParts, "\n") fanoutScript := strings.Join(fanoutParts, "\n")
// Base64-encode the script to avoid shell quoting conflicts — the script
// contains single quotes (ssh '...') that would break a bash -c '...' wrapper.
encoded := base64.StdEncoding.EncodeToString([]byte(fanoutScript)) encoded := base64.StdEncoding.EncodeToString([]byte(fanoutScript))
runCmd := fmt.Sprintf("echo %s | base64 -d | bash", encoded) runCmd := fmt.Sprintf("echo %s | base64 -d | bash", encoded)
fmt.Printf(" Fanning out to %d nodes from %s...\n", len(targets), hub.Host) fmt.Printf(" Fanning out to %d nodes from %s...\n", len(targets), hub.Host)
if err := remotessh.RunSSHStreaming(hub, runCmd, remotessh.WithAgentForward()); err != nil { // No agent forwarding — hub authenticates to each target using only
// that target's key (embedded above). IdentitiesOnly=yes ensures the
// hub's own host key is never accidentally offered to other nodes.
if err := remotessh.RunSSHStreaming(hub, runCmd); err != nil {
fmt.Printf(" Fanout failed: %v\n", err) fmt.Printf(" Fanout failed: %v\n", err)
fmt.Println(" Some nodes may not have been updated") fmt.Println(" Some nodes may not have been updated")
} }

View File

@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"io" "io"
"text/tabwriter" "text/tabwriter"
"time"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@ -126,32 +127,83 @@ func runTriggersList(cmd *cobra.Command, args []string) error {
} }
w := tabwriter.NewWriter(cmd.OutOrStdout(), 0, 0, 2, ' ', 0) w := tabwriter.NewWriter(cmd.OutOrStdout(), 0, 0, 2, ' ', 0)
fmt.Fprintln(w, "ID\tTOPIC\tENABLED") // Bug #65 audit: the previous CLI rendered only ID/TOPIC/ENABLED, so cron
// triggers appeared as mystery blank-topic rows. The handler returns a
// `kind` discriminator plus pubsub-only `topic` or cron-only
// `cron_expression` / `next_run_at` / `last_run_at`; the CLI now renders
// both kinds in a single unified table.
fmt.Fprintln(w, "ID\tKIND\tSCHEDULE/TOPIC\tNEXT RUN\tLAST RUN\tENABLED")
for _, t := range triggers { for _, t := range triggers {
tr, ok := t.(map[string]interface{}) tr, ok := t.(map[string]interface{})
if !ok { if !ok {
continue continue
} }
id, _ := tr["ID"].(string) id := stringField(tr, "id", "ID")
if id == "" { kind := stringField(tr, "kind", "Kind")
id, _ = tr["id"].(string) // Backward compat: pre-#65 servers returned only `topic` with no
// `kind` field. Treat those as pubsub.
if kind == "" {
kind = "pubsub"
} }
topic, _ := tr["Topic"].(string)
if topic == "" { var what, nextRun, lastRun string
topic, _ = tr["topic"].(string) switch kind {
case "cron":
what = stringField(tr, "cron_expression", "CronExpression")
nextRun = formatCronTimestamp(tr["next_run_at"])
lastRun = formatCronTimestamp(tr["last_run_at"])
default: // pubsub or unknown
what = stringField(tr, "topic", "Topic")
nextRun = "-"
lastRun = "-"
} }
enabled := true enabled := true
if e, ok := tr["Enabled"].(bool); ok { if e, ok := tr["Enabled"].(bool); ok {
enabled = e enabled = e
} else if e, ok := tr["enabled"].(bool); ok { } else if e, ok := tr["enabled"].(bool); ok {
enabled = e enabled = e
} }
fmt.Fprintf(w, "%s\t%s\t%v\n", id, topic, enabled) fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%v\n", id, kind, what, nextRun, lastRun, enabled)
} }
w.Flush() w.Flush()
return nil return nil
} }
// stringField pulls a string from a JSON-decoded map under any of the
// supplied keys, in order. The handler emits snake_case (`cron_expression`)
// while older Go-tagged structs may surface PascalCase — try both.
func stringField(m map[string]interface{}, keys ...string) string {
for _, k := range keys {
if v, ok := m[k].(string); ok && v != "" {
return v
}
}
return ""
}
// formatCronTimestamp renders a JSON timestamp from the handler in a compact
// human-readable form. Returns "-" for nil / unparseable values so the CLI
// table stays aligned for never-run / pubsub rows.
func formatCronTimestamp(v interface{}) string {
if v == nil {
return "-"
}
s, ok := v.(string)
if !ok || s == "" {
return "-"
}
// Try RFC3339 first (Go's default time.Time JSON encoding); fall back to
// the raw string so unexpected formats don't disappear silently.
if ts, err := time.Parse(time.RFC3339, s); err == nil {
return ts.UTC().Format("2006-01-02 15:04:05 UTC")
}
if ts, err := time.Parse(time.RFC3339Nano, s); err == nil {
return ts.UTC().Format("2006-01-02 15:04:05 UTC")
}
return s
}
func runTriggersDelete(cmd *cobra.Command, args []string) error { func runTriggersDelete(cmd *cobra.Command, args []string) error {
funcName := args[0] funcName := args[0]
triggerID := args[1] triggerID := args[1]

View File

@ -0,0 +1,114 @@
package functions
import (
"testing"
)
// ----------------------------------------------------------------------------
// stringField — pulls value from JSON-decoded map under any of the given keys
// ----------------------------------------------------------------------------
func TestStringField_prefersFirstKey(t *testing.T) {
m := map[string]interface{}{
"id": "first",
"ID": "second",
}
if got := stringField(m, "id", "ID"); got != "first" {
t.Errorf("stringField = %q, want %q", got, "first")
}
}
func TestStringField_fallsThroughWhenFirstMissing(t *testing.T) {
m := map[string]interface{}{
"ID": "second",
}
if got := stringField(m, "id", "ID"); got != "second" {
t.Errorf("stringField = %q, want %q", got, "second")
}
}
func TestStringField_emptyValueSkipped(t *testing.T) {
// An empty string under the first key MUST fall through to subsequent
// keys, otherwise empty pubsub `topic` fields would shadow valid
// PascalCase `Topic`.
m := map[string]interface{}{
"id": "",
"ID": "fallback",
}
if got := stringField(m, "id", "ID"); got != "fallback" {
t.Errorf("stringField = %q, want %q", got, "fallback")
}
}
func TestStringField_nonStringValueSkipped(t *testing.T) {
m := map[string]interface{}{
"id": 42,
"ID": "ok",
}
if got := stringField(m, "id", "ID"); got != "ok" {
t.Errorf("stringField = %q, want %q", got, "ok")
}
}
func TestStringField_allMissingReturnsEmpty(t *testing.T) {
m := map[string]interface{}{"other": "value"}
if got := stringField(m, "id", "ID"); got != "" {
t.Errorf("stringField = %q, want empty", got)
}
}
// ----------------------------------------------------------------------------
// formatCronTimestamp — RFC3339 -> UTC display, "-" for missing/unparseable
// ----------------------------------------------------------------------------
func TestFormatCronTimestamp_nilReturnsDash(t *testing.T) {
if got := formatCronTimestamp(nil); got != "-" {
t.Errorf("formatCronTimestamp(nil) = %q, want %q", got, "-")
}
}
func TestFormatCronTimestamp_emptyStringReturnsDash(t *testing.T) {
if got := formatCronTimestamp(""); got != "-" {
t.Errorf("formatCronTimestamp(\"\") = %q, want %q", got, "-")
}
}
func TestFormatCronTimestamp_nonStringReturnsDash(t *testing.T) {
if got := formatCronTimestamp(42); got != "-" {
t.Errorf("formatCronTimestamp(42) = %q, want %q", got, "-")
}
}
func TestFormatCronTimestamp_rfc3339(t *testing.T) {
got := formatCronTimestamp("2025-05-08T03:00:00Z")
want := "2025-05-08 03:00:00 UTC"
if got != want {
t.Errorf("formatCronTimestamp = %q, want %q", got, want)
}
}
func TestFormatCronTimestamp_rfc3339Nano(t *testing.T) {
got := formatCronTimestamp("2025-05-08T03:00:00.123456789Z")
want := "2025-05-08 03:00:00 UTC"
if got != want {
t.Errorf("formatCronTimestamp nano = %q, want %q", got, want)
}
}
func TestFormatCronTimestamp_rfc3339WithOffset(t *testing.T) {
// Non-UTC offsets must be normalised to UTC for the display.
got := formatCronTimestamp("2025-05-08T05:00:00+02:00")
want := "2025-05-08 03:00:00 UTC"
if got != want {
t.Errorf("formatCronTimestamp offset = %q, want %q", got, want)
}
}
func TestFormatCronTimestamp_unparseableFallsBackToRaw(t *testing.T) {
// If the server returns an unexpected timestamp shape, surface it
// rather than silently dropping to "-" — operator visibility wins.
got := formatCronTimestamp("not-a-timestamp")
if got != "not-a-timestamp" {
t.Errorf("formatCronTimestamp unparseable = %q, want raw passthrough", got)
}
}

View File

@ -416,3 +416,73 @@ func TestJWKSHandler_RSAOnly(t *testing.T) {
t.Errorf("expected RS256, got %s", result.Keys[0]["alg"]) t.Errorf("expected RS256, got %s", result.Keys[0]["alg"])
} }
} }
// TestEdDSACrossServiceVerify is the regression test for bug #215. Two Service
// instances configured with the SAME Ed25519 key (the cluster-shared scenario
// produced by deterministic HKDF derivation in pkg/gateway/signing_key.go)
// must be able to verify each other's tokens. Without this guarantee a JWT
// minted on the main gateway is unverifiable on a namespace gateway and host
// functions see an empty caller_jwt_subject.
func TestEdDSACrossServiceVerify(t *testing.T) {
// Shared key — what HKDF would produce from the cluster secret.
_, shared, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatalf("generate shared key: %v", err)
}
makeService := func() *Service {
s := createTestService(t) // RSA + mock client
s.SetEdDSAKey(shared)
return s
}
signer := makeService() // simulates main gateway
verifier := makeService() // simulates namespace gateway (different process, same shared key)
// Sanity: both services must agree on edKeyID since it is derived from the
// public key. If they don't, kid-based verification will silently fail.
if signer.edKeyID != verifier.edKeyID {
t.Fatalf("edKeyID mismatch: signer=%q verifier=%q", signer.edKeyID, verifier.edKeyID)
}
const wantSub = "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE"
const wantNS = "anchat-test"
token, _, err := signer.GenerateJWT(wantNS, wantSub, 15*time.Minute)
if err != nil {
t.Fatalf("signer.GenerateJWT: %v", err)
}
claims, err := verifier.ParseAndVerifyJWT(token)
if err != nil {
t.Fatalf("cross-service verify failed: %v", err)
}
if claims.Sub != wantSub {
t.Errorf("Sub = %q, want %q", claims.Sub, wantSub)
}
if claims.Namespace != wantNS {
t.Errorf("Namespace = %q, want %q", claims.Namespace, wantNS)
}
}
// TestEdDSACrossServiceVerify_differentKeysFail proves the verify gate is
// real: when two services have different Ed25519 keys (the broken state
// before bug #215 fix), tokens minted on one MUST NOT validate on the other.
// If this test ever passes, the deterministic-derivation guarantee is
// silently bypassed somewhere.
func TestEdDSACrossServiceVerify_differentKeysFail(t *testing.T) {
signer := createTestService(t)
_, signKey, _ := ed25519.GenerateKey(rand.Reader)
signer.SetEdDSAKey(signKey)
verifier := createTestService(t)
_, verKey, _ := ed25519.GenerateKey(rand.Reader)
verifier.SetEdDSAKey(verKey)
token, _, err := signer.GenerateJWT("ns", "sub", 15*time.Minute)
if err != nil {
t.Fatalf("GenerateJWT: %v", err)
}
if _, err := verifier.ParseAndVerifyJWT(token); err == nil {
t.Fatal("expected verification to fail with different signing keys, got nil error")
}
}

View File

@ -82,11 +82,20 @@ type Dependencies struct {
// Used by the ws_pubsub_bridge host function. Nil = disabled. // Used by the ws_pubsub_bridge host function. Nil = disabled.
WSBridge *wsbridge.Bridge WSBridge *wsbridge.Bridge
// Push notification dispatcher (nil when push isn't configured — // Push notification dispatcher (legacy single-tier; nil when push
// hostfunc + HTTP handlers degrade to no-op / 503). // isn't configured at all). When PushManager is also set, send paths
// route through the manager instead so per-namespace config wins.
PushDispatcher *push.PushDispatcher PushDispatcher *push.PushDispatcher
PushDeviceStore push.PushDeviceStore PushDeviceStore push.PushDeviceStore
// PushManager wraps the device store + per-namespace config store so
// tenants self-serve their push provider config via PUT /v1/push/config.
// Nil when push subsystem isn't initialized (cluster secret missing).
// When set, this is the canonical send path; PushDispatcher is the
// fallback used only if Manager is somehow missing.
PushManager *push.Manager
PushConfigStore push.ConfigStore
// Authentication service // Authentication service
AuthService *auth.Service AuthService *auth.Service
} }
@ -464,10 +473,18 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
secretsMgr = smImpl secretsMgr = smImpl
} }
// Initialize push notification dispatcher if any provider is configured. // Initialize push notification subsystem.
// Devices are stored encrypted in RQLite (see migration 023). Providers //
// are registered based on gateway config; missing config = provider absent. // Bug #220 follow-up: the subsystem now ALWAYS initializes when the
pushDispatcher, pushStore, err := buildPushDispatcher(cfg, deps.ORMClient, logger) // cluster secret is available (so tenants can register devices and
// PUT their per-namespace push config), regardless of whether the
// gateway YAML has a default provider configured. The Manager wraps
// the device store + per-namespace ConfigStore; Send paths route
// through Manager so per-namespace config takes effect.
//
// PushDispatcher (legacy) is set only when YAML defaults exist —
// kept for back-compat with code that hasn't migrated to Manager.
pushDispatcher, pushStore, pushManager, pushCfgStore, err := buildPushDispatcher(cfg, deps.ORMClient, logger)
if err != nil { if err != nil {
// Non-fatal: log and continue. Functions calling push_send will get nil // Non-fatal: log and continue. Functions calling push_send will get nil
// (silent no-op) and HTTP /v1/push/* endpoints return 503. // (silent no-op) and HTTP /v1/push/* endpoints return 503.
@ -476,6 +493,8 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
} }
deps.PushDispatcher = pushDispatcher deps.PushDispatcher = pushDispatcher
deps.PushDeviceStore = pushStore deps.PushDeviceStore = pushStore
deps.PushManager = pushManager
deps.PushConfigStore = pushCfgStore
// Create host functions provider (allows functions to call Orama services) // Create host functions provider (allows functions to call Orama services)
hostFuncsCfg := hostfunctions.HostFunctionsConfig{ hostFuncsCfg := hostfunctions.HostFunctionsConfig{
@ -494,7 +513,8 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
pubsubAdapter, // pubsub adapter for serverless functions pubsubAdapter, // pubsub adapter for serverless functions
deps.ServerlessWSMgr, deps.ServerlessWSMgr,
secretsMgr, secretsMgr,
pushDispatcher, // may be nil — PushSend hostfunc handles that pushDispatcher, // legacy — fallback when manager isn't wired
pushManager, // bug #220 follow-up — per-namespace config
deps.WSBridge, // may be nil; WSPubSubBridge returns explicit error deps.WSBridge, // may be nil; WSPubSubBridge returns explicit error
hostFuncsCfg, hostFuncsCfg,
logger.Logger, logger.Logger,
@ -581,8 +601,21 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
return fmt.Errorf("failed to initialize auth service: %w", err) return fmt.Errorf("failed to initialize auth service: %w", err)
} }
// Load or create EdDSA key for new JWT tokens // Load or create EdDSA key for new JWT tokens. Bug #215 fix: when
edKey, err := loadOrCreateEdSigningKey(cfg.DataDir, logger) // cfg.ClusterSecret is set, the key is derived deterministically from
// it via HKDF, so every gateway in the cluster shares the same Ed25519
// keypair and JWTs verify cross-node. With an empty ClusterSecret the
// per-node legacy behaviour is retained (single-node test deployments).
if cfg.ClusterSecret == "" {
// Loud warning: a multi-node cluster booted without a cluster
// secret reproduces bug #215 (per-gateway random keys, JWTs
// unverifiable cross-node). Single-node test rigs are the only
// legitimate case.
logger.ComponentWarn(logging.ComponentGeneral,
"ClusterSecret is empty; JWT signing keys will be random per-node. "+
"Multi-node clusters MUST set ClusterSecret or JWTs will not verify across gateways (bug #215).")
}
edKey, err := loadOrCreateEdSigningKey(cfg.DataDir, cfg.ClusterSecret, logger)
if err != nil { if err != nil {
logger.ComponentWarn(logging.ComponentGeneral, "Failed to load EdDSA signing key; new JWTs will use RS256", logger.ComponentWarn(logging.ComponentGeneral, "Failed to load EdDSA signing key; new JWTs will use RS256",
zap.Error(err)) zap.Error(err))
@ -791,39 +824,96 @@ func injectRQLiteAuth(dsn, username, password string) string {
return dsn return dsn
} }
// buildPushDispatcher constructs a push.PushDispatcher + device store with // buildPushDispatcher constructs the push subsystem.
// all enabled providers. Returns (nil, nil, nil) when no provider is //
// configured — that's a supported state, not an error. Returns (nil, nil, // As of bug #220 follow-up, push always initializes when ClusterSecret is
// err) on hard init failures (e.g. cluster secret missing for the // available, regardless of whether any YAML provider config is set:
// encrypted device store). //
func buildPushDispatcher(cfg *Config, db rqlite.Client, logger *logging.ColoredLogger) (*push.PushDispatcher, push.PushDeviceStore, error) { // - Device store + ConfigStore always build (tenants need to register
if cfg.NtfyBaseURL == "" && cfg.ExpoAccessToken == "" { // devices and set per-namespace push config even on gateways with no
// No providers configured — push is disabled. // YAML defaults).
return nil, nil, nil // - Manager wraps the stores + a YAML-derived Defaults fallback. Each
} // namespace can override any default via PUT /v1/push/config.
// - The legacy single-tier dispatcher is built only when YAML defaults
// are non-empty — kept for back-compat with code paths that haven't
// migrated to Manager.
//
// Returns (nil, nil, nil, nil, nil) when ClusterSecret is missing
// (push subsystem disabled — credentials can't be encrypted safely).
// Returns hard error only on store-init failure.
func buildPushDispatcher(
cfg *Config,
db rqlite.Client,
logger *logging.ColoredLogger,
) (*push.PushDispatcher, push.PushDeviceStore, *push.Manager, push.ConfigStore, error) {
if cfg.ClusterSecret == "" { if cfg.ClusterSecret == "" {
// Devices are encrypted at rest using a cluster-secret-derived key. // Without the cluster secret we can't encrypt credentials at rest.
// Without it we can't store anything safely. // Disable the whole push subsystem; HTTP routes return 503.
return nil, nil, fmt.Errorf("push enabled but ClusterSecret is empty") return nil, nil, nil, nil, nil
} }
store, err := push.NewRqliteDeviceStore(db, cfg.ClusterSecret, logger.Logger) store, err := push.NewRqliteDeviceStore(db, cfg.ClusterSecret, logger.Logger)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("init push device store: %w", err) return nil, nil, nil, nil, fmt.Errorf("init push device store: %w", err)
} }
d := push.New(store, logger.Logger)
if cfg.NtfyBaseURL != "" { cfgStore, err := push.NewRqliteConfigStore(db, cfg.ClusterSecret, logger.Logger)
d.Register(pushntfy.New(pushntfy.Config{ if err != nil {
BaseURL: cfg.NtfyBaseURL, return nil, nil, nil, nil, fmt.Errorf("init push config store: %w", err)
AuthToken: cfg.NtfyAuthToken,
}, logger.Logger))
logger.ComponentInfo(logging.ComponentGeneral, "push provider registered: ntfy",
zap.String("base_url", cfg.NtfyBaseURL))
} }
if cfg.ExpoAccessToken != "" {
d.Register(pushexpo.New(pushexpo.Config{ // ProviderFactory turns a resolved Config into the right set of
AccessToken: cfg.ExpoAccessToken, // provider instances. Lives here in dependencies.go because this is
}, logger.Logger)) // the only place that imports both the manager package and the
logger.ComponentInfo(logging.ComponentGeneral, "push provider registered: expo") // concrete provider sub-packages — keeps push core dep-cycle-free.
factory := func(c push.Config) []push.PushProvider {
var ps []push.PushProvider
if c.NtfyBaseURL != "" {
ps = append(ps, pushntfy.New(pushntfy.Config{
BaseURL: c.NtfyBaseURL,
AuthToken: c.NtfyAuthToken,
}, logger.Logger))
}
if c.ExpoAccessToken != "" {
ps = append(ps, pushexpo.New(pushexpo.Config{
AccessToken: c.ExpoAccessToken,
}, logger.Logger))
}
return ps
} }
return d, store, nil
defaults := push.Defaults{
NtfyBaseURL: cfg.NtfyBaseURL,
NtfyAuthToken: cfg.NtfyAuthToken,
ExpoAccessToken: cfg.ExpoAccessToken,
}
manager := push.NewManager(store, cfgStore, defaults, factory, logger.Logger)
// Legacy single-tier dispatcher kept ONLY when YAML defaults exist —
// some non-Manager code paths (notably the WASM push_send hostfunc
// before its migration to Manager) still expect a populated
// PushDispatcher. New code routes via Manager.
var legacy *push.PushDispatcher
if !defaults.IsEmpty() {
legacy = push.New(store, logger.Logger)
for _, p := range factory(push.Config{
NtfyBaseURL: defaults.NtfyBaseURL,
NtfyAuthToken: defaults.NtfyAuthToken,
ExpoAccessToken: defaults.ExpoAccessToken,
}) {
legacy.Register(p)
}
}
if defaults.NtfyBaseURL != "" {
logger.ComponentInfo(logging.ComponentGeneral, "push default provider: ntfy",
zap.String("base_url", defaults.NtfyBaseURL))
}
if defaults.ExpoAccessToken != "" {
logger.ComponentInfo(logging.ComponentGeneral, "push default provider: expo configured")
}
logger.ComponentInfo(logging.ComponentGeneral,
"push subsystem initialized; tenants can self-serve via PUT /v1/push/config")
return legacy, store, manager, cfgStore, nil
} }

View File

@ -367,7 +367,19 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) {
// The handlers themselves return 503 if dispatcher/store is nil; we // The handlers themselves return 503 if dispatcher/store is nil; we
// register them unconditionally so the routes always exist with a // register them unconditionally so the routes always exist with a
// predictable shape. // predictable shape.
if deps.PushDispatcher != nil { //
// Prefer the Manager-backed constructor (bug #220 follow-up) so
// tenants can self-serve their push config via PUT /v1/push/config.
// Fall back to the legacy constructor when only the YAML-derived
// dispatcher is available (older deployments without ClusterSecret).
if deps.PushManager != nil {
gw.pushHandlers = pushhandlers.NewHandlersWithManager(
deps.PushManager,
deps.PushConfigStore,
deps.PushDeviceStore,
logger,
)
} else if deps.PushDispatcher != nil {
gw.pushHandlers = pushhandlers.NewHandlers(deps.PushDispatcher, deps.PushDeviceStore, logger) gw.pushHandlers = pushhandlers.NewHandlers(deps.PushDispatcher, deps.PushDeviceStore, logger)
} }

View File

@ -0,0 +1,233 @@
package push
// config_handler.go — tenant-self-service push provider configuration.
//
// Endpoints (mounted under /v1/push/config; namespace-ownership middleware applies):
//
// GET /v1/push/config → current config (secrets redacted: only "has_X" booleans)
// PUT /v1/push/config → set/update fields; sensitive credentials encrypted at rest
// DELETE /v1/push/config → clear the namespace's row (push reverts to gateway YAML defaults)
//
// Bug #220 follow-up. Eliminates the "tenant must file an ops ticket"
// workflow: once this lands, AnChat (and every future tenant) self-serves
// their push provider config via authenticated HTTP, no operator
// involvement.
import (
"encoding/json"
"errors"
"net/http"
"strings"
"time"
"github.com/DeBrosOfficial/network/pkg/push"
"go.uber.org/zap"
)
// configManager is the subset of *push.Manager the config handlers need —
// kept narrow for testability.
type configManager interface {
IsConfigured(ctx contextLike, namespace string) bool
Invalidate(namespace string)
}
// contextLike avoids importing context everywhere — the handler is
// already in package serverless which has request contexts.
type contextLike = interface {
Done() <-chan struct{}
}
// PutConfigRequest is the body of PUT /v1/push/config.
//
// Field semantics:
// - Unset fields (zero value) leave the existing value alone.
// - Empty-string fields explicitly clear the value.
// - To clear the entire row use DELETE — that's clearer than empty PUT.
type PutConfigRequest struct {
NtfyBaseURL *string `json:"ntfy_base_url,omitempty"`
NtfyAuthToken *string `json:"ntfy_auth_token,omitempty"`
ExpoAccessToken *string `json:"expo_access_token,omitempty"`
}
// MaxConfigBodyBytes caps the PUT body size. Push tokens are typically
// well under 1 KB but we leave headroom.
const MaxConfigBodyBytes = 16 * 1024
// pushConfigManager is the concrete dependency the Handlers struct holds
// — a *push.Manager. We extract it via a small interface for tests.
type pushConfigManager interface {
IsConfigured(ctx interface{ Done() <-chan struct{} }, namespace string) bool
Invalidate(namespace string)
}
// GetConfigHandler — GET /v1/push/config. Returns the namespace's current
// push provider config with sensitive fields REDACTED to boolean flags.
//
// Always 200 + canonical envelope-free body when the request is well-formed
// (clients can rely on the shape: an absent provider just shows
// `has_ntfy_auth_token: false`). 503 only when the config store itself
// isn't available on this gateway (e.g. push subsystem disabled).
func (h *Handlers) GetConfigHandler(w http.ResponseWriter, r *http.Request) {
if h.configStore == nil {
writeError(w, http.StatusServiceUnavailable,
"push config store not available on this gateway")
return
}
if r.Method != http.MethodGet {
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
return
}
ns := resolveNamespace(r)
if ns == "" {
writeError(w, http.StatusForbidden, "namespace not resolved")
return
}
cfg, err := h.configStore.Get(boundCtx(r), ns)
if err != nil && !errors.Is(err, push.ErrConfigNotFound) {
h.logger.ComponentWarn("push", "config GET failed",
zap.String("namespace", ns), zap.Error(err))
writeError(w, http.StatusInternalServerError, "failed to load config")
return
}
// Not found → return empty redacted config. Clients distinguish
// "configured" via the boolean fields.
if cfg == nil {
writeJSON(w, http.StatusOK, push.RedactedConfig{Namespace: ns})
return
}
writeJSON(w, http.StatusOK, cfg.Redacted())
}
// PutConfigHandler — PUT /v1/push/config. Updates the namespace's push
// provider config. Field-level semantics: nil JSON values leave the
// existing field untouched; explicit empty-strings clear it.
//
// On success returns the redacted config (same shape as GET) so clients
// can confirm what's now in place without echoing back the credentials.
//
// Invalidates the manager's cached dispatcher for this namespace so the
// next push send rebuilds with the fresh config.
func (h *Handlers) PutConfigHandler(w http.ResponseWriter, r *http.Request) {
if h.configStore == nil {
writeError(w, http.StatusServiceUnavailable,
"push config store not available on this gateway")
return
}
if r.Method != http.MethodPut && r.Method != http.MethodPost {
writeError(w, http.StatusMethodNotAllowed, "method not allowed (use PUT)")
return
}
ns := resolveNamespace(r)
if ns == "" {
writeError(w, http.StatusForbidden, "namespace not resolved")
return
}
caller := resolveCallerUserID(r)
if caller == "" {
writeError(w, http.StatusUnauthorized, "user authentication required")
return
}
r.Body = http.MaxBytesReader(w, r.Body, MaxConfigBodyBytes)
var body PutConfigRequest
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
writeError(w, http.StatusBadRequest, "invalid body: expected JSON")
return
}
// Validate URL fields look reasonable. We don't do hostname resolution
// here (slow, flaky); just reject obviously-wrong schemes.
if body.NtfyBaseURL != nil && *body.NtfyBaseURL != "" {
if !strings.HasPrefix(*body.NtfyBaseURL, "http://") &&
!strings.HasPrefix(*body.NtfyBaseURL, "https://") {
writeError(w, http.StatusBadRequest,
"ntfy_base_url must start with http:// or https://")
return
}
}
// Read existing for merge — PUT semantics are field-level, not
// whole-document replace.
existing, err := h.configStore.Get(boundCtx(r), ns)
if err != nil && !errors.Is(err, push.ErrConfigNotFound) {
h.logger.ComponentWarn("push", "config GET-before-PUT failed",
zap.String("namespace", ns), zap.Error(err))
writeError(w, http.StatusInternalServerError, "failed to load config")
return
}
cfg := push.Config{Namespace: ns, UpdatedAt: time.Now().Unix(), UpdatedBy: caller}
if existing != nil {
cfg.NtfyBaseURL = existing.NtfyBaseURL
cfg.NtfyAuthToken = existing.NtfyAuthToken
cfg.ExpoAccessToken = existing.ExpoAccessToken
}
if body.NtfyBaseURL != nil {
cfg.NtfyBaseURL = *body.NtfyBaseURL
}
if body.NtfyAuthToken != nil {
cfg.NtfyAuthToken = *body.NtfyAuthToken
}
if body.ExpoAccessToken != nil {
cfg.ExpoAccessToken = *body.ExpoAccessToken
}
if err := h.configStore.Upsert(boundCtx(r), cfg); err != nil {
h.logger.ComponentWarn("push", "config PUT failed",
zap.String("namespace", ns), zap.Error(err))
writeError(w, http.StatusInternalServerError, "failed to save config")
return
}
if h.manager != nil {
h.manager.Invalidate(ns)
}
h.logger.ComponentInfo("push", "config updated",
zap.String("namespace", ns),
zap.String("updated_by", caller),
zap.Bool("has_ntfy_url", cfg.NtfyBaseURL != ""),
zap.Bool("has_ntfy_auth_token", cfg.NtfyAuthToken != ""),
zap.Bool("has_expo_access_token", cfg.ExpoAccessToken != ""),
)
writeJSON(w, http.StatusOK, cfg.Redacted())
}
// DeleteConfigHandler — DELETE /v1/push/config. Clears the namespace's
// row entirely; push reverts to gateway YAML defaults (or 503 "not
// configured" if no defaults).
func (h *Handlers) DeleteConfigHandler(w http.ResponseWriter, r *http.Request) {
if h.configStore == nil {
writeError(w, http.StatusServiceUnavailable,
"push config store not available on this gateway")
return
}
if r.Method != http.MethodDelete {
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
return
}
ns := resolveNamespace(r)
if ns == "" {
writeError(w, http.StatusForbidden, "namespace not resolved")
return
}
caller := resolveCallerUserID(r)
if caller == "" {
writeError(w, http.StatusUnauthorized, "user authentication required")
return
}
if err := h.configStore.Delete(boundCtx(r), ns); err != nil {
h.logger.ComponentWarn("push", "config DELETE failed",
zap.String("namespace", ns), zap.Error(err))
writeError(w, http.StatusInternalServerError, "failed to delete config")
return
}
if h.manager != nil {
h.manager.Invalidate(ns)
}
h.logger.ComponentInfo("push", "config cleared",
zap.String("namespace", ns),
zap.String("cleared_by", caller),
)
writeJSON(w, http.StatusOK, map[string]string{"status": "cleared"})
}

View File

@ -2,6 +2,7 @@ package push
import ( import (
"encoding/json" "encoding/json"
"errors"
"net/http" "net/http"
"strings" "strings"
"time" "time"
@ -221,7 +222,9 @@ func (h *Handlers) DeleteDeviceHandler(w http.ResponseWriter, r *http.Request) {
// exposing this in production.** The WASM hostfunc bypasses this issue // exposing this in production.** The WASM hostfunc bypasses this issue
// because trigger registration already gates which functions exist. // because trigger registration already gates which functions exist.
func (h *Handlers) SendHandler(w http.ResponseWriter, r *http.Request) { func (h *Handlers) SendHandler(w http.ResponseWriter, r *http.Request) {
if h.dispatcher == nil { // Either the per-namespace manager (preferred) or the legacy single
// dispatcher must be present. Both nil = push not configured at all.
if h.manager == nil && h.dispatcher == nil {
writeError(w, http.StatusServiceUnavailable, "push: dispatcher not configured") writeError(w, http.StatusServiceUnavailable, "push: dispatcher not configured")
return return
} }
@ -261,14 +264,27 @@ func (h *Handlers) SendHandler(w http.ResponseWriter, r *http.Request) {
Sound: body.Sound, Sound: body.Sound,
Data: body.Data, Data: body.Data,
} }
if err := h.dispatcher.SendToUser(boundCtx(r), ns, body.UserID, msg); err != nil { // Prefer the per-namespace Manager when present so per-namespace
// config (set via PUT /v1/push/config) takes effect. Fall back to the
// legacy single dispatcher only when no Manager is wired.
var sendErr error
if h.manager != nil {
sendErr = h.manager.SendToUser(boundCtx(r), ns, body.UserID, msg)
if errors.Is(sendErr, push.ErrPushNotConfigured) {
writeError(w, http.StatusServiceUnavailable, sendErr.Error())
return
}
} else {
sendErr = h.dispatcher.SendToUser(boundCtx(r), ns, body.UserID, msg)
}
if sendErr != nil {
// Treat as non-fatal: some devices may have failed but others may // Treat as non-fatal: some devices may have failed but others may
// have succeeded. Surface as 502 to signal partial trouble; logs // have succeeded. Surface as 502 to signal partial trouble; logs
// have the per-device detail. // have the per-device detail.
h.logger.ComponentWarn("push", "send to user partially failed", h.logger.ComponentWarn("push", "send to user partially failed",
zap.String("namespace", ns), zap.String("namespace", ns),
zap.String("user_id", body.UserID), zap.String("user_id", body.UserID),
zap.Error(err)) zap.Error(sendErr))
writeError(w, http.StatusBadGateway, "one or more devices failed") writeError(w, http.StatusBadGateway, "one or more devices failed")
return return
} }

View File

@ -26,14 +26,26 @@ import (
// Handlers serves the /v1/push/* HTTP endpoints. Construct via NewHandlers; // Handlers serves the /v1/push/* HTTP endpoints. Construct via NewHandlers;
// it's safe for concurrent use. // it's safe for concurrent use.
//
// dispatcher is the legacy single-tier dispatcher (kept for the device
// register/list/delete + send paths). manager is the per-namespace
// dispatcher built on top of ConfigStore (new in bug #220 follow-up);
// when both are present, send paths route through manager so per-namespace
// config wins.
//
// configStore + manager may be nil on gateways with push fully disabled —
// the corresponding endpoints return 503.
type Handlers struct { type Handlers struct {
dispatcher *push.PushDispatcher dispatcher *push.PushDispatcher
store push.PushDeviceStore manager *push.Manager
logger *logging.ColoredLogger store push.PushDeviceStore
configStore push.ConfigStore
logger *logging.ColoredLogger
} }
// NewHandlers constructs a Handlers. Either argument may be nil — in which // NewHandlers constructs a Handlers with the legacy single-namespace
// case the corresponding endpoints return 503 Service Unavailable. // dispatcher only. Use NewHandlersWithManager for per-namespace config
// support (bug #220 follow-up).
func NewHandlers(dispatcher *push.PushDispatcher, store push.PushDeviceStore, logger *logging.ColoredLogger) *Handlers { func NewHandlers(dispatcher *push.PushDispatcher, store push.PushDeviceStore, logger *logging.ColoredLogger) *Handlers {
return &Handlers{ return &Handlers{
dispatcher: dispatcher, dispatcher: dispatcher,
@ -42,6 +54,23 @@ func NewHandlers(dispatcher *push.PushDispatcher, store push.PushDeviceStore, lo
} }
} }
// NewHandlersWithManager constructs Handlers wired to a Manager + ConfigStore
// for tenant-self-service per-namespace configuration. Send paths use the
// manager when present so per-namespace ntfy/expo settings take effect.
func NewHandlersWithManager(
manager *push.Manager,
configStore push.ConfigStore,
deviceStore push.PushDeviceStore,
logger *logging.ColoredLogger,
) *Handlers {
return &Handlers{
manager: manager,
configStore: configStore,
store: deviceStore,
logger: logger,
}
}
// RegisterDeviceRequest is the body of POST /v1/push/devices. // RegisterDeviceRequest is the body of POST /v1/push/devices.
// //
// `device_id` is an app-supplied stable identifier (e.g. the OS-assigned // `device_id` is an app-supplied stable identifier (e.g. the OS-assigned

View File

@ -173,6 +173,43 @@ func (h *ServerlessHandlers) DeployFunction(w http.ResponseWriter, r *http.Reque
} }
} }
// Register Cron triggers from definition. Mirrors the PubSub branch above:
// stale rows (from a previous deploy whose manifest had different cron
// schedules) are cleared first, then the manifest's expressions are
// re-added. Without this, manifest-driven cron schedules silently never
// fired (feature #65 audit).
//
// We always run the RemoveByFunction even when CronExpressions is empty,
// otherwise editing a manifest from `cron_expressions: ["0 3 * * *"]` to
// `cron_expressions: []` would leave the old schedule in place.
if h.cronStore != nil && fn != nil {
if err := h.cronStore.RemoveByFunction(ctx, fn.ID); err != nil {
h.logger.Warn("Failed to clear stale cron triggers",
zap.String("function", def.Name),
zap.Error(err))
}
// Dedupe identical expressions so a manifest accident
// (`cron_expressions: ["0 3 * * *", "0 3 * * *"]`) doesn't fire the
// function twice every tick.
seen := make(map[string]struct{}, len(def.CronExpressions))
for _, expr := range def.CronExpressions {
if _, dup := seen[expr]; dup {
continue
}
seen[expr] = struct{}{}
if _, err := h.cronStore.Add(ctx, fn.ID, expr); err != nil {
// Bad expression in a manifest is a user error worth surfacing
// but not blocking the deploy — the function itself is fine,
// only the schedule is dropped. Logged at WARN so operators
// see it; the deploy response still reports success.
h.logger.Warn("Failed to register cron trigger from manifest",
zap.String("function", def.Name),
zap.String("cron_expression", expr),
zap.Error(err))
}
}
}
writeJSON(w, http.StatusCreated, map[string]interface{}{ writeJSON(w, http.StatusCreated, map[string]interface{}{
"message": "Function deployed successfully", "message": "Function deployed successfully",
"function": fn, "function": fn,

View File

@ -110,21 +110,29 @@ type GatewayYAMLWebRTC struct {
// This must match the yamlCfg struct in cmd/gateway/config.go exactly // This must match the yamlCfg struct in cmd/gateway/config.go exactly
// because the gateway uses strict YAML decoding that rejects unknown fields // because the gateway uses strict YAML decoding that rejects unknown fields
type GatewayYAMLConfig struct { type GatewayYAMLConfig struct {
ListenAddr string `yaml:"listen_addr"` ListenAddr string `yaml:"listen_addr"`
ClientNamespace string `yaml:"client_namespace"` ClientNamespace string `yaml:"client_namespace"`
RQLiteDSN string `yaml:"rqlite_dsn"` RQLiteDSN string `yaml:"rqlite_dsn"`
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn,omitempty"` GlobalRQLiteDSN string `yaml:"global_rqlite_dsn,omitempty"`
BootstrapPeers []string `yaml:"bootstrap_peers,omitempty"` BootstrapPeers []string `yaml:"bootstrap_peers,omitempty"`
EnableHTTPS bool `yaml:"enable_https,omitempty"` EnableHTTPS bool `yaml:"enable_https,omitempty"`
DomainName string `yaml:"domain_name,omitempty"` DomainName string `yaml:"domain_name,omitempty"`
TLSCacheDir string `yaml:"tls_cache_dir,omitempty"` TLSCacheDir string `yaml:"tls_cache_dir,omitempty"`
OlricServers []string `yaml:"olric_servers"` OlricServers []string `yaml:"olric_servers"`
OlricTimeout string `yaml:"olric_timeout,omitempty"` OlricTimeout string `yaml:"olric_timeout,omitempty"`
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url,omitempty"` IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url,omitempty"`
IPFSAPIURL string `yaml:"ipfs_api_url,omitempty"` IPFSAPIURL string `yaml:"ipfs_api_url,omitempty"`
IPFSTimeout string `yaml:"ipfs_timeout,omitempty"` IPFSTimeout string `yaml:"ipfs_timeout,omitempty"`
IPFSReplicationFactor int `yaml:"ipfs_replication_factor,omitempty"` IPFSReplicationFactor int `yaml:"ipfs_replication_factor,omitempty"`
WebRTC GatewayYAMLWebRTC `yaml:"webrtc,omitempty"` WebRTC GatewayYAMLWebRTC `yaml:"webrtc,omitempty"`
// ClusterSecretPath points to the host's cluster-secret file. Bug #215
// follow-up: namespace gateways spawned by systemd previously had no
// way to access the cluster secret, so they fell back to per-node
// random JWT signing keys and JWTs were unverifiable cross-node within
// the namespace cluster. Setting this path lets the standalone gateway
// binary read the secret on startup and derive the canonical Ed25519
// key shared with every other gateway in the cluster.
ClusterSecretPath string `yaml:"cluster_secret_path,omitempty"`
} }
// NewInstanceSpawner creates a new Gateway instance spawner // NewInstanceSpawner creates a new Gateway instance spawner

View File

@ -0,0 +1,87 @@
package gateway
import (
"strings"
"testing"
"gopkg.in/yaml.v3"
)
// TestGatewayYAMLConfig_clusterSecretPathRoundTrip is the regression test for
// bug #215 reopen. The first fix derived JWT keys from cfg.ClusterSecret, but
// namespace gateways spawned via systemd had cfg.ClusterSecret == "" because
// the YAML schema lacked any field to carry it. This test guards the YAML tag
// so every namespace gateway YAML written by SystemdSpawner.SpawnGateway
// carries the path the standalone binary needs.
func TestGatewayYAMLConfig_clusterSecretPathRoundTrip(t *testing.T) {
cfg := GatewayYAMLConfig{
ListenAddr: ":6001",
ClientNamespace: "anchat-test",
RQLiteDSN: "http://localhost:10000",
OlricServers: []string{"localhost:3320"},
ClusterSecretPath: "/opt/orama/.orama/secrets/cluster-secret",
}
out, err := yaml.Marshal(cfg)
if err != nil {
t.Fatalf("marshal: %v", err)
}
if !strings.Contains(string(out), "cluster_secret_path: /opt/orama/.orama/secrets/cluster-secret") {
t.Fatalf("YAML output missing expected cluster_secret_path line:\n%s", out)
}
// Round-trip into a struct shaped like cmd/gateway/config.go's yamlCfg
// (internal type, duplicated here intentionally so this test catches
// drift between the two declarations).
type webrtc struct {
Enabled bool `yaml:"enabled"`
SFUPort int `yaml:"sfu_port"`
TURNDomain string `yaml:"turn_domain"`
TURNSecret string `yaml:"turn_secret"`
}
type yamlCfgMirror struct {
ListenAddr string `yaml:"listen_addr"`
ClientNamespace string `yaml:"client_namespace"`
RQLiteDSN string `yaml:"rqlite_dsn"`
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"`
Peers []string `yaml:"bootstrap_peers"`
EnableHTTPS bool `yaml:"enable_https"`
DomainName string `yaml:"domain_name"`
TLSCacheDir string `yaml:"tls_cache_dir"`
OlricServers []string `yaml:"olric_servers"`
OlricTimeout string `yaml:"olric_timeout"`
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"`
IPFSAPIURL string `yaml:"ipfs_api_url"`
IPFSTimeout string `yaml:"ipfs_timeout"`
IPFSReplicationFactor int `yaml:"ipfs_replication_factor"`
WebRTC webrtc `yaml:"webrtc"`
ClusterSecretPath string `yaml:"cluster_secret_path"`
}
var parsed yamlCfgMirror
if err := yaml.Unmarshal(out, &parsed); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if parsed.ClusterSecretPath != cfg.ClusterSecretPath {
t.Errorf("round-trip mismatch: got %q, want %q", parsed.ClusterSecretPath, cfg.ClusterSecretPath)
}
}
// TestGatewayYAMLConfig_omitWhenEmpty: when the host has no cluster secret,
// the field is omitted from the YAML so legacy single-node test rigs don't
// see a stray "cluster_secret_path: " line that operators might mistake for
// a configuration directive.
func TestGatewayYAMLConfig_omitWhenEmpty(t *testing.T) {
cfg := GatewayYAMLConfig{
ListenAddr: ":6001",
ClientNamespace: "ns",
RQLiteDSN: "http://localhost:10000",
OlricServers: []string{"localhost:3320"},
// ClusterSecretPath intentionally empty.
}
out, err := yaml.Marshal(cfg)
if err != nil {
t.Fatalf("marshal: %v", err)
}
if strings.Contains(string(out), "cluster_secret_path") {
t.Errorf("empty ClusterSecretPath should be omitted from YAML; got:\n%s", out)
}
}

View File

@ -0,0 +1,194 @@
package gateway
import (
"net/http"
"reflect"
"testing"
"github.com/DeBrosOfficial/network/pkg/gateway/auth"
)
// TestSetInternalAuthJWTHeaders_setsSubAndCustom verifies that the proxy
// helper writes the validated JWT subject and base64(JSON) custom claims to
// outbound headers (bug #215 wiring).
func TestSetInternalAuthJWTHeaders_setsSubAndCustom(t *testing.T) {
h := http.Header{}
claims := &auth.JWTClaims{
Sub: "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE",
Custom: map[string]string{
"tier": "pro",
"subscription": "active",
},
}
setInternalAuthJWTHeaders(h, claims)
if got := h.Get(HeaderInternalAuthJWTSub); got != claims.Sub {
t.Errorf("Sub header = %q, want %q", got, claims.Sub)
}
if h.Get(HeaderInternalAuthJWTCustom) == "" {
t.Error("Custom header is empty, expected base64 JSON blob")
}
}
// TestSetInternalAuthJWTHeaders_nilClaimsNoOp guards the API-key path: when
// no JWT was used to authenticate, no JWT headers should be set.
func TestSetInternalAuthJWTHeaders_nilClaimsNoOp(t *testing.T) {
h := http.Header{}
setInternalAuthJWTHeaders(h, nil)
if got := h.Get(HeaderInternalAuthJWTSub); got != "" {
t.Errorf("Sub header = %q, want empty (nil claims)", got)
}
if got := h.Get(HeaderInternalAuthJWTCustom); got != "" {
t.Errorf("Custom header = %q, want empty (nil claims)", got)
}
}
// TestSetInternalAuthJWTHeaders_emptySubNotForwarded keeps malformed claims
// from polluting the outbound request.
func TestSetInternalAuthJWTHeaders_emptySubNotForwarded(t *testing.T) {
h := http.Header{}
setInternalAuthJWTHeaders(h, &auth.JWTClaims{Sub: " "})
if got := h.Get(HeaderInternalAuthJWTSub); got != "" {
t.Errorf("Sub header = %q, want empty (whitespace-only sub)", got)
}
}
// TestSetInternalAuthJWTHeaders_emptyCustomMapNotForwarded — empty maps are
// skipped to keep headers small and avoid sending `e30=` (base64 of "{}").
func TestSetInternalAuthJWTHeaders_emptyCustomMapNotForwarded(t *testing.T) {
h := http.Header{}
setInternalAuthJWTHeaders(h, &auth.JWTClaims{Sub: "0xabc", Custom: map[string]string{}})
if got := h.Get(HeaderInternalAuthJWTCustom); got != "" {
t.Errorf("Custom header = %q, want empty (empty custom map)", got)
}
}
// TestClaimsFromInternalAuthHeaders_roundTrip is the end-to-end guarantee:
// what the main gateway sets via setInternalAuthJWTHeaders MUST be exactly
// what the namespace gateway recovers via claimsFromInternalAuthHeaders.
func TestClaimsFromInternalAuthHeaders_roundTrip(t *testing.T) {
original := &auth.JWTClaims{
Sub: "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE",
Custom: map[string]string{
"tier": "pro",
"subscription": "active",
},
}
h := http.Header{}
setInternalAuthJWTHeaders(h, original)
got := claimsFromInternalAuthHeaders(h, "anchat-test")
if got == nil {
t.Fatal("recovered claims is nil")
}
if got.Sub != original.Sub {
t.Errorf("Sub = %q, want %q", got.Sub, original.Sub)
}
if got.Namespace != "anchat-test" {
t.Errorf("Namespace = %q, want %q", got.Namespace, "anchat-test")
}
if !reflect.DeepEqual(got.Custom, original.Custom) {
t.Errorf("Custom = %#v, want %#v", got.Custom, original.Custom)
}
}
// TestClaimsFromInternalAuthHeaders_noSubReturnsNil — when no JWT was
// forwarded (caller used API key), recovery returns nil so the namespace
// gateway leaves ctxKeyJWT unset.
func TestClaimsFromInternalAuthHeaders_noSubReturnsNil(t *testing.T) {
h := http.Header{}
if got := claimsFromInternalAuthHeaders(h, "ns"); got != nil {
t.Errorf("expected nil claims when no Sub header present, got %#v", got)
}
}
// TestClaimsFromInternalAuthHeaders_invalidCustomIgnored — corrupt custom
// blob must not block recovery of the subject.
func TestClaimsFromInternalAuthHeaders_invalidCustomIgnored(t *testing.T) {
h := http.Header{}
h.Set(HeaderInternalAuthJWTSub, "0xabc")
h.Set(HeaderInternalAuthJWTCustom, "not-valid-base64!!!")
got := claimsFromInternalAuthHeaders(h, "ns")
if got == nil {
t.Fatal("recovered claims is nil despite valid Sub header")
}
if got.Sub != "0xabc" {
t.Errorf("Sub = %q, want %q", got.Sub, "0xabc")
}
if got.Custom != nil {
t.Errorf("Custom = %#v, want nil (invalid blob should be silently dropped)", got.Custom)
}
}
// TestStripInboundInternalAuthHeaders_removesAllForgedHeaders is the regression
// test for the security audit's CRITICAL finding (bug #215 follow-up). Without
// this strip, an external attacker could forge X-Internal-Auth-JWT-Sub on a
// public path and impersonate any wallet on the namespace gateway, which gates
// on source IP only.
func TestStripInboundInternalAuthHeaders_removesAllForgedHeaders(t *testing.T) {
h := http.Header{}
h.Set(HeaderInternalAuthValidated, "true")
h.Set(HeaderInternalAuthNamespace, "victim-namespace")
h.Set(HeaderInternalAuthJWTSub, "0xVICTIM_WALLET")
h.Set(HeaderInternalAuthJWTCustom, "ZXZpbA==") // base64("evil")
// Unrelated headers MUST survive the strip.
h.Set("X-Forwarded-For", "1.2.3.4")
h.Set("Authorization", "Bearer some-token")
stripInboundInternalAuthHeaders(h)
for _, name := range []string{
HeaderInternalAuthValidated,
HeaderInternalAuthNamespace,
HeaderInternalAuthJWTSub,
HeaderInternalAuthJWTCustom,
} {
if got := h.Get(name); got != "" {
t.Errorf("after strip, %s = %q, want empty", name, got)
}
}
if got := h.Get("X-Forwarded-For"); got != "1.2.3.4" {
t.Errorf("X-Forwarded-For lost: %q", got)
}
if got := h.Get("Authorization"); got != "Bearer some-token" {
t.Errorf("Authorization lost: %q", got)
}
}
// TestStripThenSet_attackerSubReplaced exercises the proxy-hop sequence:
// strip-then-set. An attacker-controlled JWT-Sub header MUST be replaced by
// the gateway-validated value (or removed entirely when the caller used an
// API key and validatedClaims is nil).
func TestStripThenSet_attackerSubReplaced(t *testing.T) {
t.Run("validated_jwt_overwrites_attacker_sub", func(t *testing.T) {
h := http.Header{}
h.Set(HeaderInternalAuthJWTSub, "0xATTACKER")
stripInboundInternalAuthHeaders(h)
// Mimic the proxy hop: gateway authenticated user as 0xLEGIT.
h.Set(HeaderInternalAuthValidated, "true")
h.Set(HeaderInternalAuthNamespace, "ns")
setInternalAuthJWTHeaders(h, &auth.JWTClaims{Sub: "0xLEGIT"})
if got := h.Get(HeaderInternalAuthJWTSub); got != "0xLEGIT" {
t.Errorf("Sub = %q, want %q (attacker value should be overwritten)", got, "0xLEGIT")
}
})
t.Run("api_key_path_drops_attacker_sub", func(t *testing.T) {
h := http.Header{}
h.Set(HeaderInternalAuthJWTSub, "0xATTACKER")
stripInboundInternalAuthHeaders(h)
// Mimic the proxy hop: gateway authenticated user via API key
// (validatedClaims is nil).
h.Set(HeaderInternalAuthValidated, "true")
h.Set(HeaderInternalAuthNamespace, "ns")
setInternalAuthJWTHeaders(h, nil)
if got := h.Get(HeaderInternalAuthJWTSub); got != "" {
t.Errorf("Sub = %q, want empty (attacker value must not survive API-key auth)", got)
}
})
}

View File

@ -2,6 +2,8 @@ package gateway
import ( import (
"context" "context"
"encoding/base64"
"encoding/json"
"errors" "errors"
"fmt" "fmt"
"hash/fnv" "hash/fnv"
@ -70,27 +72,114 @@ const (
HeaderInternalAuthNamespace = "X-Internal-Auth-Namespace" HeaderInternalAuthNamespace = "X-Internal-Auth-Namespace"
// HeaderInternalAuthValidated indicates the request was pre-authenticated by main gateway // HeaderInternalAuthValidated indicates the request was pre-authenticated by main gateway
HeaderInternalAuthValidated = "X-Internal-Auth-Validated" HeaderInternalAuthValidated = "X-Internal-Auth-Validated"
// HeaderInternalAuthJWTSub carries the validated JWT `sub` claim across the
// internal proxy hop. Bug #215: without this, namespace gateways received
// only the namespace and host functions saw empty `caller_jwt_subject`.
// Trusted only when X-Internal-Auth-Validated=true AND the source IP is
// internal (WireGuard mesh / loopback) — same trust model as the
// namespace header.
HeaderInternalAuthJWTSub = "X-Internal-Auth-JWT-Sub"
// HeaderInternalAuthJWTCustom carries the validated JWT custom-claims map
// as a base64(JSON) blob. Optional; absent means the original token had
// no custom claims. Same trust gate as HeaderInternalAuthJWTSub.
HeaderInternalAuthJWTCustom = "X-Internal-Auth-JWT-Custom"
) )
// setInternalAuthJWTHeaders writes the validated JWT subject and custom claims
// onto an outbound proxy request so the destination gateway can hydrate
// ctxKeyJWT. Called only after auth has been validated AND we're proxying over
// the WireGuard mesh (the destination still gates trust by source IP).
//
// No-op when claims is nil (caller authenticated with API key, not JWT).
func setInternalAuthJWTHeaders(h http.Header, claims *auth.JWTClaims) {
if claims == nil {
return
}
if sub := strings.TrimSpace(claims.Sub); sub != "" {
h.Set(HeaderInternalAuthJWTSub, sub)
}
if len(claims.Custom) > 0 {
buf, err := json.Marshal(claims.Custom)
if err == nil {
h.Set(HeaderInternalAuthJWTCustom, base64.StdEncoding.EncodeToString(buf))
}
}
}
// stripInboundInternalAuthHeaders deletes the X-Internal-Auth-* headers from
// the supplied header set. MUST be called on every outbound request the main
// gateway proxies to a namespace gateway BEFORE we (conditionally) re-set
// them with the validated values.
//
// SECURITY (bug #215 follow-up — critical): without this, an external client
// could send `X-Internal-Auth-Validated: true` + `X-Internal-Auth-JWT-Sub:
// <victim-wallet>` directly. The header-copy loop at the proxy hop would
// forward those headers verbatim. The namespace gateway, seeing the request
// arrive from the main gateway's WireGuard IP, would trust them and hydrate
// ctxKeyJWT with attacker-controlled subject — full cross-namespace identity
// spoofing on every public path.
//
// Stripping FIRST and conditionally setting AFTER closes that hole regardless
// of which auth path won (JWT, API key, or no creds on a public path).
func stripInboundInternalAuthHeaders(h http.Header) {
h.Del(HeaderInternalAuthValidated)
h.Del(HeaderInternalAuthNamespace)
h.Del(HeaderInternalAuthJWTSub)
h.Del(HeaderInternalAuthJWTCustom)
}
// claimsFromInternalAuthHeaders rebuilds a *auth.JWTClaims from the trusted
// internal-auth headers. Returns nil if no JWT subject was forwarded (the
// caller used an API key, or the request didn't carry validated JWT data).
//
// SECURITY: this MUST only be called after the caller has confirmed
// HeaderInternalAuthValidated == "true" AND the source IP is internal AND
// the proxy hop has stripped any inbound copies of these headers (see
// stripInboundInternalAuthHeaders). Skipping any of those would let any
// client forge JWT identities.
func claimsFromInternalAuthHeaders(h http.Header, namespace string) *auth.JWTClaims {
sub := strings.TrimSpace(h.Get(HeaderInternalAuthJWTSub))
if sub == "" {
return nil
}
claims := &auth.JWTClaims{
Sub: sub,
Namespace: namespace,
}
if raw := strings.TrimSpace(h.Get(HeaderInternalAuthJWTCustom)); raw != "" {
if decoded, err := base64.StdEncoding.DecodeString(raw); err == nil {
var custom map[string]string
if json.Unmarshal(decoded, &custom) == nil && len(custom) > 0 {
claims.Custom = custom
}
}
}
return claims
}
// validateAuthForNamespaceProxy validates the request's auth credentials against the MAIN // validateAuthForNamespaceProxy validates the request's auth credentials against the MAIN
// cluster RQLite and returns the namespace the credentials belong to. // cluster RQLite and returns the namespace the credentials belong to plus the
// JWT claims when the caller authenticated with a Bearer JWT.
// This is used by handleNamespaceGatewayRequest to pre-authenticate before proxying to // This is used by handleNamespaceGatewayRequest to pre-authenticate before proxying to
// namespace gateways (which have isolated RQLites without API keys). // namespace gateways (which have isolated RQLites without API keys).
// //
// Returns: // Returns:
// - (namespace, "") if auth is valid // - (namespace, claims, "") if auth is valid via JWT — claims may be used to
// - ("", errorMessage) if auth is invalid // populate internal-auth headers so the namespace gateway can hydrate
// - ("", "") if no auth credentials provided (for public paths) // `caller_jwt_subject` for serverless host functions (bug #215).
func (g *Gateway) validateAuthForNamespaceProxy(r *http.Request) (namespace string, errMsg string) { // - (namespace, nil, "") if auth is valid via API key.
// - ("", nil, errorMessage) if auth is invalid.
// - ("", nil, "") if no auth credentials provided (for public paths).
func (g *Gateway) validateAuthForNamespaceProxy(r *http.Request) (namespace string, claims *auth.JWTClaims, errMsg string) {
// 1) Try JWT Bearer first // 1) Try JWT Bearer first
if auth := r.Header.Get("Authorization"); auth != "" { if authHeader := r.Header.Get("Authorization"); authHeader != "" {
lower := strings.ToLower(auth) lower := strings.ToLower(authHeader)
if strings.HasPrefix(lower, "bearer ") { if strings.HasPrefix(lower, "bearer ") {
tok := strings.TrimSpace(auth[len("Bearer "):]) tok := strings.TrimSpace(authHeader[len("Bearer "):])
if strings.Count(tok, ".") == 2 { if strings.Count(tok, ".") == 2 {
if claims, err := g.authService.ParseAndVerifyJWT(tok); err == nil { if c, err := g.authService.ParseAndVerifyJWT(tok); err == nil {
if ns := strings.TrimSpace(claims.Namespace); ns != "" { if ns := strings.TrimSpace(c.Namespace); ns != "" {
return ns, "" return ns, c, ""
} }
} }
// JWT verification failed - fall through to API key check // JWT verification failed - fall through to API key check
@ -101,14 +190,14 @@ func (g *Gateway) validateAuthForNamespaceProxy(r *http.Request) (namespace stri
// 2) Try API key // 2) Try API key
key := extractAPIKey(r) key := extractAPIKey(r)
if key == "" { if key == "" {
return "", "" // No credentials provided return "", nil, "" // No credentials provided
} }
ns, err := g.lookupAPIKeyNamespace(r.Context(), key, g.client) ns, err := g.lookupAPIKeyNamespace(r.Context(), key, g.client)
if err != nil { if err != nil {
return "", "invalid API key" return "", nil, "invalid API key"
} }
return ns, "" return ns, nil, ""
} }
// lookupAPIKeyNamespace resolves an API key to its namespace using cache and DB. // lookupAPIKeyNamespace resolves an API key to its namespace using cache and DB.
@ -325,6 +414,17 @@ func (g *Gateway) authMiddleware(next http.Handler) http.Handler {
if ns != "" { if ns != "" {
// Pre-authenticated by main gateway - trust the namespace // Pre-authenticated by main gateway - trust the namespace
reqCtx := context.WithValue(r.Context(), CtxKeyNamespaceOverride, ns) reqCtx := context.WithValue(r.Context(), CtxKeyNamespaceOverride, ns)
// Bug #215: also rebuild ctxKeyJWT from the trusted
// internal-auth headers (subject + custom claims) so
// serverless host functions see a non-empty
// caller_jwt_subject. We deliberately do NOT re-verify
// the JWT here — namespace gateways may not share the
// signing key with the main gateway, and the main gateway
// already verified before forwarding. Trust gate is the
// source-IP check above.
if claims := claimsFromInternalAuthHeaders(r.Header, ns); claims != nil {
reqCtx = context.WithValue(reqCtx, ctxKeyJWT, claims)
}
next.ServeHTTP(w, r.WithContext(reqCtx)) next.ServeHTTP(w, r.WithContext(reqCtx))
return return
} }
@ -916,7 +1016,7 @@ func (g *Gateway) domainRoutingMiddleware(next http.Handler) http.Handler {
func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.Request, namespaceName string) { func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.Request, namespaceName string) {
// Validate auth against main cluster RQLite BEFORE proxying // Validate auth against main cluster RQLite BEFORE proxying
// This ensures API keys work even though they're not in the namespace's RQLite // This ensures API keys work even though they're not in the namespace's RQLite
validatedNamespace, authErr := g.validateAuthForNamespaceProxy(r) validatedNamespace, validatedClaims, authErr := g.validateAuthForNamespaceProxy(r)
if authErr != "" && !isPublicPath(r.URL.Path) { if authErr != "" && !isPublicPath(r.URL.Path) {
w.Header().Set("WWW-Authenticate", "Bearer error=\"invalid_token\"") w.Header().Set("WWW-Authenticate", "Bearer error=\"invalid_token\"")
writeError(w, http.StatusUnauthorized, authErr) writeError(w, http.StatusUnauthorized, authErr)
@ -1078,10 +1178,19 @@ func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.R
r.Header.Set("X-Forwarded-For", getClientIP(r)) r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Forwarded-Proto", "https") r.Header.Set("X-Forwarded-Proto", "https")
r.Header.Set("X-Forwarded-Host", r.Host) r.Header.Set("X-Forwarded-Host", r.Host)
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers
// the external client may have set BEFORE applying the trusted
// values from this gateway. Prevents identity spoofing on the
// namespace gateway, which gates on source IP only.
stripInboundInternalAuthHeaders(r.Header)
// Set internal auth headers if auth was validated // Set internal auth headers if auth was validated
if validatedNamespace != "" { if validatedNamespace != "" {
r.Header.Set(HeaderInternalAuthValidated, "true") r.Header.Set(HeaderInternalAuthValidated, "true")
r.Header.Set(HeaderInternalAuthNamespace, validatedNamespace) r.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
// Bug #215: forward validated JWT subject + custom claims so the
// namespace gateway's auth middleware can hydrate ctxKeyJWT and
// host functions see a non-empty caller_jwt_subject.
setInternalAuthJWTHeaders(r.Header, validatedClaims)
} }
r.URL.Scheme = "http" r.URL.Scheme = "http"
r.URL.Host = targetHost r.URL.Host = targetHost
@ -1120,11 +1229,21 @@ func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.R
proxyReq.Header.Set("X-Forwarded-Host", r.Host) proxyReq.Header.Set("X-Forwarded-Host", r.Host)
proxyReq.Header.Set("X-Original-Host", r.Host) proxyReq.Header.Set("X-Original-Host", r.Host)
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers
// the header-copy loop above may have inherited from the inbound
// request BEFORE writing the trusted values. The namespace gateway
// gates on source IP only; without this strip, an external attacker
// could forge X-Internal-Auth-JWT-Sub and impersonate any wallet.
stripInboundInternalAuthHeaders(proxyReq.Header)
// Set internal auth headers if auth was validated by main gateway // Set internal auth headers if auth was validated by main gateway
// This allows the namespace gateway to trust the authentication // This allows the namespace gateway to trust the authentication
if validatedNamespace != "" { if validatedNamespace != "" {
proxyReq.Header.Set(HeaderInternalAuthValidated, "true") proxyReq.Header.Set(HeaderInternalAuthValidated, "true")
proxyReq.Header.Set(HeaderInternalAuthNamespace, validatedNamespace) proxyReq.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
// Bug #215: forward validated JWT subject + custom claims so the
// namespace gateway's auth middleware can hydrate ctxKeyJWT and
// host functions see a non-empty caller_jwt_subject.
setInternalAuthJWTHeaders(proxyReq.Header, validatedClaims)
} }
// Pick the proxy timeout based on the path's expected work bound. // Pick the proxy timeout based on the path's expected work bound.
@ -1353,6 +1472,11 @@ serveLocal:
targetHost := "localhost:" + strconv.Itoa(deployment.Port) targetHost := "localhost:" + strconv.Itoa(deployment.Port)
target := "http://" + targetHost target := "http://" + targetHost
// SECURITY (bug #215 follow-up): drop X-Internal-Auth-* headers before
// forwarding to a customer-deployed app on localhost. The app has no
// reason to see Orama's internal-auth metadata, and stripping prevents
// any accidental leakage if the customer ever adds header-based trust.
stripInboundInternalAuthHeaders(r.Header)
// Set proxy headers // Set proxy headers
r.Header.Set("X-Forwarded-For", getClientIP(r)) r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Forwarded-Proto", "https") r.Header.Set("X-Forwarded-Proto", "https")
@ -1465,6 +1589,12 @@ func (g *Gateway) proxyCrossNode(w http.ResponseWriter, r *http.Request, deploym
// Handle WebSocket upgrade requests specially // Handle WebSocket upgrade requests specially
if isWebSocketUpgrade(r) { if isWebSocketUpgrade(r) {
// SECURITY (bug #215 follow-up): drop X-Internal-Auth-* headers from
// the inbound request before forwarding to another node. The target
// node's authMiddleware gates internal-auth trust on source IP only;
// without this strip, an external attacker could forge JWT-Sub here
// and have the home node trust it after the cross-node hop.
stripInboundInternalAuthHeaders(r.Header)
r.Header.Set("X-Forwarded-For", getClientIP(r)) r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
r.URL.Scheme = "http" r.URL.Scheme = "http"
@ -1490,6 +1620,9 @@ func (g *Gateway) proxyCrossNode(w http.ResponseWriter, r *http.Request, deploym
proxyReq.Header.Add(key, value) proxyReq.Header.Add(key, value)
} }
} }
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers the
// inbound request may have carried. See WS branch above.
stripInboundInternalAuthHeaders(proxyReq.Header)
proxyReq.Host = r.Host // Keep original host for domain routing on target node proxyReq.Host = r.Host // Keep original host for domain routing on target node
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r)) proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) // Prevent loops proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) // Prevent loops
@ -1590,6 +1723,8 @@ func (g *Gateway) proxyCrossNodeToIP(w http.ResponseWriter, r *http.Request, dep
// Handle WebSocket upgrade requests specially // Handle WebSocket upgrade requests specially
if isWebSocketUpgrade(r) { if isWebSocketUpgrade(r) {
// SECURITY (bug #215 follow-up): see proxyCrossNode for rationale.
stripInboundInternalAuthHeaders(r.Header)
r.Header.Set("X-Forwarded-For", getClientIP(r)) r.Header.Set("X-Forwarded-For", getClientIP(r))
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
r.URL.Scheme = "http" r.URL.Scheme = "http"
@ -1613,6 +1748,8 @@ func (g *Gateway) proxyCrossNodeToIP(w http.ResponseWriter, r *http.Request, dep
proxyReq.Header.Add(key, value) proxyReq.Header.Add(key, value)
} }
} }
// SECURITY (bug #215 follow-up): see proxyCrossNode for rationale.
stripInboundInternalAuthHeaders(proxyReq.Header)
proxyReq.Host = r.Host proxyReq.Host = r.Host
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r)) proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)

View File

@ -62,3 +62,27 @@ func (g *Gateway) pushSendHandler(w http.ResponseWriter, r *http.Request) {
} }
g.pushHandlers.SendHandler(w, r) g.pushHandlers.SendHandler(w, r)
} }
// pushConfigHandler dispatches GET / PUT / DELETE on /v1/push/config — the
// tenant-self-service entrypoint for per-namespace push provider config
// (bug #220 follow-up). When push is fully disabled returns 503 with the
// same actionable message as the device endpoints.
func (g *Gateway) pushConfigHandler(w http.ResponseWriter, r *http.Request) {
if g.pushHandlers == nil {
httputil.WriteRPCError(w, http.StatusServiceUnavailable,
httputil.ErrCodeServiceUnavailable, pushNotConfiguredMessage)
return
}
switch r.Method {
case http.MethodGet:
g.pushHandlers.GetConfigHandler(w, r)
case http.MethodPut, http.MethodPost:
g.pushHandlers.PutConfigHandler(w, r)
case http.MethodDelete:
g.pushHandlers.DeleteConfigHandler(w, r)
default:
httputil.WriteRPCError(w, http.StatusMethodNotAllowed,
httputil.ErrCodeValidationFailed,
"method not allowed: use GET to read, PUT to update, or DELETE to clear")
}
}

View File

@ -139,6 +139,11 @@ func (g *Gateway) Routes() http.Handler {
mux.HandleFunc("/v1/push/devices/", g.pushDevicesByIDHandler) mux.HandleFunc("/v1/push/devices/", g.pushDevicesByIDHandler)
mux.HandleFunc("/v1/push/send", g.pushSendHandler) mux.HandleFunc("/v1/push/send", g.pushSendHandler)
// Per-namespace push provider configuration (bug #220 follow-up):
// GET / PUT / DELETE — tenants self-serve their ntfy/expo credentials
// instead of filing an ops ticket. Method dispatched in the handler.
mux.HandleFunc("/v1/push/config", g.pushConfigHandler)
// operator node management (wallet JWT auth via middleware) // operator node management (wallet JWT auth via middleware)
if g.operatorHandler != nil { if g.operatorHandler != nil {
mux.HandleFunc("/v1/operator/invite", g.operatorHandler.HandleInvite) mux.HandleFunc("/v1/operator/invite", g.operatorHandler.HandleInvite)

View File

@ -4,19 +4,27 @@ import (
"crypto/ed25519" "crypto/ed25519"
"crypto/rand" "crypto/rand"
"crypto/rsa" "crypto/rsa"
"crypto/sha256"
"crypto/x509" "crypto/x509"
"encoding/pem" "encoding/pem"
"fmt" "fmt"
"io"
"os" "os"
"path/filepath" "path/filepath"
"github.com/DeBrosOfficial/network/pkg/logging" "github.com/DeBrosOfficial/network/pkg/logging"
"go.uber.org/zap" "go.uber.org/zap"
"golang.org/x/crypto/hkdf"
) )
const jwtKeyFileName = "jwt-signing-key.pem" const jwtKeyFileName = "jwt-signing-key.pem"
const eddsaKeyFileName = "jwt-eddsa-key.pem" const eddsaKeyFileName = "jwt-eddsa-key.pem"
// jwtEdDSADerivePurpose is the HKDF info string used to derive the cluster-wide
// Ed25519 JWT signing seed from the cluster secret. Bumping the version label
// (e.g. "...-v2") on disk = full re-derive + invalidates old tokens.
const jwtEdDSADerivePurpose = "orama-jwt-eddsa-v1"
// loadOrCreateSigningKey loads the JWT signing key from disk, or generates a new one // loadOrCreateSigningKey loads the JWT signing key from disk, or generates a new one
// if none exists. This ensures JWTs survive gateway restarts. // if none exists. This ensures JWTs survive gateway restarts.
func loadOrCreateSigningKey(dataDir string, logger *logging.ColoredLogger) ([]byte, error) { func loadOrCreateSigningKey(dataDir string, logger *logging.ColoredLogger) ([]byte, error) {
@ -65,10 +73,37 @@ func loadOrCreateSigningKey(dataDir string, logger *logging.ColoredLogger) ([]by
} }
// loadOrCreateEdSigningKey loads or generates an Ed25519 private key for EdDSA JWT signing. // loadOrCreateEdSigningKey loads or generates an Ed25519 private key for EdDSA JWT signing.
// The key is stored as a PKCS8-encoded PEM file alongside the RSA key. //
func loadOrCreateEdSigningKey(dataDir string, logger *logging.ColoredLogger) (ed25519.PrivateKey, error) { // Bug #215 fix: when a non-empty clusterSecret is provided, the key is derived
// DETERMINISTICALLY from the cluster secret using HKDF-SHA256. This means every
// gateway in the cluster ends up with the same Ed25519 keypair, so a JWT signed
// by one gateway can be verified by any other gateway. Without this, each
// gateway minted its own random key on first boot, JWTs were unverifiable
// cross-gateway, and host functions saw empty `caller_jwt_subject` whenever
// invoke landed on a different node than `/v1/auth/login`.
//
// When clusterSecret is empty (single-node test rigs, no shared secret),
// falls back to the legacy random-per-node behaviour.
//
// On-disk PEM is the source of truth at runtime; if it doesn't match the
// derivation (e.g. left over from before this fix, or cluster secret rotated
// the label), the file is rewritten with the canonical key. Old tokens become
// unverifiable but JWT lifetime is short (15 min) so the disruption is bounded.
func loadOrCreateEdSigningKey(dataDir, clusterSecret string, logger *logging.ColoredLogger) (ed25519.PrivateKey, error) {
keyPath := filepath.Join(dataDir, "secrets", eddsaKeyFileName) keyPath := filepath.Join(dataDir, "secrets", eddsaKeyFileName)
// Compute the canonical key for this cluster (if a secret is available).
// Empty clusterSecret = legacy mode, no canonical key, just use whatever
// is on disk or freshly generated.
var canonical ed25519.PrivateKey
if clusterSecret != "" {
seed, err := deriveEd25519Seed(clusterSecret)
if err != nil {
return nil, fmt.Errorf("derive Ed25519 seed from cluster secret: %w", err)
}
canonical = ed25519.NewKeyFromSeed(seed)
}
// Try to load existing key // Try to load existing key
if keyPEM, err := os.ReadFile(keyPath); err == nil && len(keyPEM) > 0 { if keyPEM, err := os.ReadFile(keyPath); err == nil && len(keyPEM) > 0 {
block, _ := pem.Decode(keyPEM) block, _ := pem.Decode(keyPEM)
@ -76,20 +111,38 @@ func loadOrCreateEdSigningKey(dataDir string, logger *logging.ColoredLogger) (ed
parsed, err := x509.ParsePKCS8PrivateKey(block.Bytes) parsed, err := x509.ParsePKCS8PrivateKey(block.Bytes)
if err == nil { if err == nil {
if edKey, ok := parsed.(ed25519.PrivateKey); ok { if edKey, ok := parsed.(ed25519.PrivateKey); ok {
logger.ComponentInfo(logging.ComponentGeneral, "Loaded existing EdDSA signing key", // If we have a canonical cluster-derived key, the on-disk
zap.String("path", keyPath)) // key MUST match it. Otherwise we'd silently keep using a
return edKey, nil // per-node random key and JWTs wouldn't verify cross-node.
if canonical != nil && !ed25519.PrivateKey(edKey).Equal(canonical) {
logger.ComponentWarn(logging.ComponentGeneral,
"On-disk EdDSA key does not match cluster-derived key; rewriting from cluster secret",
zap.String("path", keyPath))
// Fall through to write canonical below.
} else {
logger.ComponentInfo(logging.ComponentGeneral, "Loaded existing EdDSA signing key",
zap.String("path", keyPath))
return edKey, nil
}
} }
} }
} }
logger.ComponentWarn(logging.ComponentGeneral, "Existing EdDSA signing key is invalid, generating new one", if canonical == nil {
zap.String("path", keyPath)) logger.ComponentWarn(logging.ComponentGeneral, "Existing EdDSA signing key is invalid, generating new one",
zap.String("path", keyPath))
}
} }
// Generate new Ed25519 key // Either nothing on disk, key was unparseable, or it didn't match the
_, priv, err := ed25519.GenerateKey(rand.Reader) // canonical cluster-derived key. Use canonical when available; otherwise
if err != nil { // generate a random one (legacy single-node fallback).
return nil, fmt.Errorf("generate Ed25519 key: %w", err) priv := canonical
if priv == nil {
_, generated, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, fmt.Errorf("generate Ed25519 key: %w", err)
}
priv = generated
} }
pkcs8Bytes, err := x509.MarshalPKCS8PrivateKey(priv) pkcs8Bytes, err := x509.MarshalPKCS8PrivateKey(priv)
@ -112,7 +165,27 @@ func loadOrCreateEdSigningKey(dataDir string, logger *logging.ColoredLogger) (ed
return nil, fmt.Errorf("write EdDSA signing key: %w", err) return nil, fmt.Errorf("write EdDSA signing key: %w", err)
} }
logger.ComponentInfo(logging.ComponentGeneral, "Generated and saved new EdDSA signing key", source := "random per-node"
zap.String("path", keyPath)) if canonical != nil {
source = "cluster-derived (HKDF)"
}
logger.ComponentInfo(logging.ComponentGeneral, "Saved EdDSA signing key",
zap.String("path", keyPath),
zap.String("source", source))
return priv, nil return priv, nil
} }
// deriveEd25519Seed derives a deterministic 32-byte seed for Ed25519 from the
// cluster secret using HKDF-SHA256 with a stable purpose label. Same secret +
// same label = same seed = same keypair on every gateway in the cluster.
func deriveEd25519Seed(clusterSecret string) ([]byte, error) {
if clusterSecret == "" {
return nil, fmt.Errorf("cluster secret is empty")
}
reader := hkdf.New(sha256.New, []byte(clusterSecret), nil, []byte(jwtEdDSADerivePurpose))
seed := make([]byte, ed25519.SeedSize)
if _, err := io.ReadFull(reader, seed); err != nil {
return nil, fmt.Errorf("HKDF read failed: %w", err)
}
return seed, nil
}

View File

@ -0,0 +1,147 @@
package gateway
import (
"crypto/ed25519"
"os"
"path/filepath"
"testing"
"github.com/DeBrosOfficial/network/pkg/logging"
)
// TestDeriveEd25519Seed_deterministic guards bug #215: every gateway in a
// cluster must end up with the same Ed25519 keypair so JWTs verify on any
// node. Same cluster secret + same purpose label MUST produce the same seed.
func TestDeriveEd25519Seed_deterministic(t *testing.T) {
a, err := deriveEd25519Seed("super-secret-cluster-key")
if err != nil {
t.Fatalf("derive #1: %v", err)
}
b, err := deriveEd25519Seed("super-secret-cluster-key")
if err != nil {
t.Fatalf("derive #2: %v", err)
}
if len(a) != ed25519.SeedSize {
t.Errorf("seed size = %d, want %d", len(a), ed25519.SeedSize)
}
if string(a) != string(b) {
t.Error("seed not deterministic for same secret")
}
}
// TestDeriveEd25519Seed_differentSecretsDifferentSeeds rules out a trivial
// implementation that ignores the input.
func TestDeriveEd25519Seed_differentSecretsDifferentSeeds(t *testing.T) {
a, err := deriveEd25519Seed("secret-a")
if err != nil {
t.Fatalf("derive a: %v", err)
}
b, err := deriveEd25519Seed("secret-b")
if err != nil {
t.Fatalf("derive b: %v", err)
}
if string(a) == string(b) {
t.Error("different secrets produced identical seed")
}
}
func TestDeriveEd25519Seed_emptySecret(t *testing.T) {
if _, err := deriveEd25519Seed(""); err == nil {
t.Error("expected error for empty cluster secret, got nil")
}
}
// TestLoadOrCreateEdSigningKey_clusterSecretSharedAcrossNodes simulates two
// gateways with separate dataDirs but the same cluster secret. They MUST end
// up with the same Ed25519 private key so JWTs verify cross-node.
func TestLoadOrCreateEdSigningKey_clusterSecretSharedAcrossNodes(t *testing.T) {
dirA := t.TempDir()
dirB := t.TempDir()
const clusterSecret = "shared-cluster-secret-for-test"
logger, _ := logging.NewColoredLogger(logging.ComponentGeneral, false)
keyA, err := loadOrCreateEdSigningKey(dirA, clusterSecret, logger)
if err != nil {
t.Fatalf("node A: %v", err)
}
keyB, err := loadOrCreateEdSigningKey(dirB, clusterSecret, logger)
if err != nil {
t.Fatalf("node B: %v", err)
}
if !ed25519.PrivateKey(keyA).Equal(keyB) {
t.Fatal("two nodes with same cluster secret produced different Ed25519 keys")
}
// PEMs should also be persisted.
if _, err := os.Stat(filepath.Join(dirA, "secrets", eddsaKeyFileName)); err != nil {
t.Errorf("PEM not written on node A: %v", err)
}
if _, err := os.Stat(filepath.Join(dirB, "secrets", eddsaKeyFileName)); err != nil {
t.Errorf("PEM not written on node B: %v", err)
}
}
// TestLoadOrCreateEdSigningKey_emptySecretFallback verifies the legacy
// per-node behaviour is preserved when no cluster secret is available
// (single-node test rigs, dev). Two nodes get DIFFERENT random keys.
func TestLoadOrCreateEdSigningKey_emptySecretFallback(t *testing.T) {
dirA := t.TempDir()
dirB := t.TempDir()
logger, _ := logging.NewColoredLogger(logging.ComponentGeneral, false)
keyA, err := loadOrCreateEdSigningKey(dirA, "", logger)
if err != nil {
t.Fatalf("node A: %v", err)
}
keyB, err := loadOrCreateEdSigningKey(dirB, "", logger)
if err != nil {
t.Fatalf("node B: %v", err)
}
if ed25519.PrivateKey(keyA).Equal(keyB) {
t.Error("two nodes without cluster secret unexpectedly produced identical keys")
}
}
// TestLoadOrCreateEdSigningKey_overwritesStaleOnDiskKey covers the upgrade
// path: a gateway that previously generated a per-node random key (fix #215
// not yet deployed) now restarts with cluster-secret derivation enabled. The
// random key on disk MUST be replaced with the canonical cluster-derived one.
func TestLoadOrCreateEdSigningKey_overwritesStaleOnDiskKey(t *testing.T) {
dir := t.TempDir()
logger, _ := logging.NewColoredLogger(logging.ComponentGeneral, false)
// First boot: no cluster secret -> per-node random key.
keyV1, err := loadOrCreateEdSigningKey(dir, "", logger)
if err != nil {
t.Fatalf("v1: %v", err)
}
// Second boot: cluster secret now configured -> must rewrite to canonical.
const clusterSecret = "now-i-have-a-secret"
keyV2, err := loadOrCreateEdSigningKey(dir, clusterSecret, logger)
if err != nil {
t.Fatalf("v2: %v", err)
}
if ed25519.PrivateKey(keyV1).Equal(keyV2) {
t.Fatal("stale per-node key was not replaced when cluster secret became available")
}
// And the new key must match a fresh derivation from the same secret.
seed, err := deriveEd25519Seed(clusterSecret)
if err != nil {
t.Fatalf("derive seed: %v", err)
}
canonical := ed25519.NewKeyFromSeed(seed)
if !ed25519.PrivateKey(keyV2).Equal(canonical) {
t.Error("rewritten key does not match canonical derivation")
}
// Third boot with same secret: must be stable, no rewrite, same key.
keyV3, err := loadOrCreateEdSigningKey(dir, clusterSecret, logger)
if err != nil {
t.Fatalf("v3: %v", err)
}
if !ed25519.PrivateKey(keyV2).Equal(keyV3) {
t.Error("canonical key is not stable across restarts")
}
}

View File

@ -39,6 +39,12 @@ type ClusterManagerConfig struct {
// in RQLite. Derived from cluster secret via HKDF(clusterSecret, "turn-encryption"). // in RQLite. Derived from cluster secret via HKDF(clusterSecret, "turn-encryption").
// If nil, TURN secrets are stored in plaintext (backward compatibility). // If nil, TURN secrets are stored in plaintext (backward compatibility).
TurnEncryptionKey []byte TurnEncryptionKey []byte
// ClusterSecretPath is the host's cluster-secret file path. Forwarded
// to spawned namespace gateways via YAML so they can derive the
// cluster-wide JWT signing key (bug #215 fix). Empty string disables
// cross-node JWT verification within namespace clusters.
ClusterSecretPath string
} }
// ClusterManager orchestrates namespace cluster provisioning and lifecycle // ClusterManager orchestrates namespace cluster provisioning and lifecycle
@ -81,7 +87,7 @@ func NewClusterManager(
portAllocator := NewNamespacePortAllocator(db, logger) portAllocator := NewNamespacePortAllocator(db, logger)
webrtcPortAllocator := NewWebRTCPortAllocator(db, logger) webrtcPortAllocator := NewWebRTCPortAllocator(db, logger)
nodeSelector := NewClusterNodeSelector(db, portAllocator, logger) nodeSelector := NewClusterNodeSelector(db, portAllocator, logger)
systemdSpawner := NewSystemdSpawner(cfg.BaseDataDir, logger) systemdSpawner := NewSystemdSpawner(cfg.BaseDataDir, cfg.ClusterSecretPath, logger)
dnsManager := NewDNSRecordManager(db, cfg.BaseDomain, logger) dnsManager := NewDNSRecordManager(db, cfg.BaseDomain, logger)
// Set IPFS defaults // Set IPFS defaults

View File

@ -22,15 +22,28 @@ import (
type SystemdSpawner struct { type SystemdSpawner struct {
systemdMgr *systemd.Manager systemdMgr *systemd.Manager
namespaceBase string namespaceBase string
logger *zap.Logger // clusterSecretPath is the host's cluster-secret file path; written
// into spawned namespace gateways' YAML so they can derive the
// cluster-wide JWT signing key (bug #215). Empty string means the host
// has no cluster secret available — namespace gateways will fall back
// to per-node random keys and JWTs won't verify cross-node.
clusterSecretPath string
logger *zap.Logger
} }
// NewSystemdSpawner creates a new systemd-based spawner // NewSystemdSpawner creates a new systemd-based spawner.
func NewSystemdSpawner(namespaceBase string, logger *zap.Logger) *SystemdSpawner { //
// clusterSecretPath should point to the host node's cluster-secret file
// (typically `<oramaDir>/secrets/cluster-secret`). It is written into each
// spawned namespace gateway's YAML config so the gateway can read it on
// startup. Pass "" only if no cluster secret exists on this host (legacy
// single-node test deployments).
func NewSystemdSpawner(namespaceBase, clusterSecretPath string, logger *zap.Logger) *SystemdSpawner {
return &SystemdSpawner{ return &SystemdSpawner{
systemdMgr: systemd.NewManager(namespaceBase, logger), systemdMgr: systemd.NewManager(namespaceBase, logger),
namespaceBase: namespaceBase, namespaceBase: namespaceBase,
logger: logger.With(zap.String("component", "systemd-spawner")), clusterSecretPath: clusterSecretPath,
logger: logger.With(zap.String("component", "systemd-spawner")),
} }
} }
@ -209,6 +222,12 @@ func (s *SystemdSpawner) SpawnGateway(ctx context.Context, namespace, nodeID str
IPFSAPIURL: cfg.IPFSAPIURL, IPFSAPIURL: cfg.IPFSAPIURL,
IPFSTimeout: cfg.IPFSTimeout.String(), IPFSTimeout: cfg.IPFSTimeout.String(),
IPFSReplicationFactor: cfg.IPFSReplicationFactor, IPFSReplicationFactor: cfg.IPFSReplicationFactor,
// Bug #215 fix: forward the host's cluster secret path so the
// spawned namespace gateway can derive the cluster-wide JWT
// signing key. Without this, namespace gateways used per-node
// random Ed25519 keys and host functions saw empty
// caller_jwt_subject.
ClusterSecretPath: s.clusterSecretPath,
WebRTC: gateway.GatewayYAMLWebRTC{ WebRTC: gateway.GatewayYAMLWebRTC{
Enabled: cfg.WebRTCEnabled, Enabled: cfg.WebRTCEnabled,
SFUPort: cfg.SFUPort, SFUPort: cfg.SFUPort,

View File

@ -98,6 +98,17 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
} }
} }
// Bug #215 fix: tell the namespace cluster manager where the
// host's cluster-secret file lives. Spawned namespace gateways
// read it on startup to derive the cluster-wide Ed25519 JWT
// signing key. The path resolves to the same file the host
// gateway reads above (line ~45) — keeps the secret on disk
// once, just referenced from the namespace gateway YAML.
clusterSecretPath := ""
if clusterSecret != "" {
clusterSecretPath = filepath.Join(oramaDir, "secrets", "cluster-secret")
}
clusterCfg := namespace.ClusterManagerConfig{ clusterCfg := namespace.ClusterManagerConfig{
BaseDomain: n.config.HTTPGateway.BaseDomain, BaseDomain: n.config.HTTPGateway.BaseDomain,
BaseDataDir: baseDataDir, BaseDataDir: baseDataDir,
@ -107,6 +118,7 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
IPFSTimeout: gwCfg.IPFSTimeout, IPFSTimeout: gwCfg.IPFSTimeout,
IPFSReplicationFactor: n.config.Database.IPFS.ReplicationFactor, IPFSReplicationFactor: n.config.Database.IPFS.ReplicationFactor,
TurnEncryptionKey: turnEncKey, TurnEncryptionKey: turnEncKey,
ClusterSecretPath: clusterSecretPath,
} }
clusterManager := namespace.NewClusterManager(ormClient, clusterCfg, n.logger.Logger) clusterManager := namespace.NewClusterManager(ormClient, clusterCfg, n.logger.Logger)
clusterManager.SetLocalNodeID(gwCfg.NodePeerID) clusterManager.SetLocalNodeID(gwCfg.NodePeerID)
@ -114,8 +126,11 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
apiGateway.SetNodeRecoverer(clusterManager) apiGateway.SetNodeRecoverer(clusterManager)
apiGateway.SetWebRTCManager(clusterManager) apiGateway.SetWebRTCManager(clusterManager)
// Wire spawn handler for distributed namespace instance spawning // Wire spawn handler for distributed namespace instance spawning.
systemdSpawner := namespace.NewSystemdSpawner(baseDataDir, n.logger.Logger) // Forwards the host's cluster_secret_path through to spawned
// namespace gateways (bug #215 fix; same rationale as the
// ClusterManager spawner above).
systemdSpawner := namespace.NewSystemdSpawner(baseDataDir, clusterSecretPath, n.logger.Logger)
spawnHandler := namespacehandlers.NewSpawnHandler(systemdSpawner, n.logger.Logger) spawnHandler := namespacehandlers.NewSpawnHandler(systemdSpawner, n.logger.Logger)
apiGateway.SetSpawnHandler(spawnHandler) apiGateway.SetSpawnHandler(spawnHandler)

View File

@ -0,0 +1,232 @@
package push
// config_store.go — per-namespace push provider configuration with
// encrypted credential storage.
//
// Tenants set their own ntfy / expo credentials via the HTTP config
// endpoint; this store persists them in RQLite (`namespace_push_config`),
// encrypting sensitive fields at rest using a key derived from the
// cluster secret. The gateway YAML config remains as a global fallback —
// a row here OVERRIDES the YAML for that namespace, absent rows inherit
// YAML defaults.
//
// See bug #220 follow-up. Pattern intentionally generic (mirrors
// push_devices encryption + storage), so the next "tenant should
// self-serve this knob" need (rate limits, custom domains, etc.) can
// drop a similar table + handler in.
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/DeBrosOfficial/network/pkg/rqlite"
"github.com/DeBrosOfficial/network/pkg/secrets"
"go.uber.org/zap"
)
// purposeNamespacePushConfig is the HKDF "purpose" string for the
// per-namespace push-config encryption key. Distinct from other purposes
// so a key compromise in one domain doesn't compromise this one.
const purposeNamespacePushConfig = "namespace-push-config"
// Config is the in-memory representation of one namespace's push provider
// configuration. Sensitive fields are PLAINTEXT in this struct — encryption
// happens at the storage boundary.
type Config struct {
Namespace string
NtfyBaseURL string
NtfyAuthToken string
ExpoAccessToken string
UpdatedAt int64
UpdatedBy string
}
// IsEmpty returns true when this config has no provider credentials set —
// the namespace is effectively "no push" and the manager should fall back
// to the gateway YAML defaults (or refuse if those are also empty).
func (c *Config) IsEmpty() bool {
if c == nil {
return true
}
return c.NtfyBaseURL == "" && c.ExpoAccessToken == ""
}
// Redacted returns a copy of this Config safe for return over HTTP — the
// auth-token / access-token fields are zeroed out and replaced with a
// boolean (`HasNtfyAuthToken` / `HasExpoAccessToken`) at the response
// shape level. Use this to avoid ever sending the secrets back.
type RedactedConfig struct {
Namespace string `json:"namespace"`
NtfyBaseURL string `json:"ntfy_base_url,omitempty"`
HasNtfyAuthToken bool `json:"has_ntfy_auth_token"`
HasExpoAccessToken bool `json:"has_expo_access_token"`
UpdatedAt int64 `json:"updated_at,omitempty"`
UpdatedBy string `json:"updated_by,omitempty"`
}
// Redacted returns the redacted view safe for GET responses.
func (c *Config) Redacted() RedactedConfig {
if c == nil {
return RedactedConfig{}
}
return RedactedConfig{
Namespace: c.Namespace,
NtfyBaseURL: c.NtfyBaseURL,
HasNtfyAuthToken: c.NtfyAuthToken != "",
HasExpoAccessToken: c.ExpoAccessToken != "",
UpdatedAt: c.UpdatedAt,
UpdatedBy: c.UpdatedBy,
}
}
// ConfigStore is the persistence layer for per-namespace push config.
// Errors returned for "no row for namespace" → ErrConfigNotFound.
type ConfigStore interface {
Get(ctx context.Context, namespace string) (*Config, error)
Upsert(ctx context.Context, cfg Config) error
Delete(ctx context.Context, namespace string) error
}
// ErrConfigNotFound is returned by Get when no row exists for the
// namespace. Callers fall back to YAML defaults in that case.
var ErrConfigNotFound = errors.New("push config not found for namespace")
// rqliteConfigStore implements ConfigStore over RQLite + pkg/secrets.
type rqliteConfigStore struct {
db rqlite.Client
encKey []byte
logger *zap.Logger
}
// NewRqliteConfigStore wires the store to RQLite with a cluster-secret-
// derived encryption key for credential fields.
func NewRqliteConfigStore(db rqlite.Client, clusterSecret string, logger *zap.Logger) (ConfigStore, error) {
if clusterSecret == "" {
return nil, fmt.Errorf("push config store: cluster secret required for credential encryption")
}
key, err := secrets.DeriveKey(clusterSecret, purposeNamespacePushConfig)
if err != nil {
return nil, fmt.Errorf("push config store: derive key: %w", err)
}
if logger == nil {
logger = zap.NewNop()
}
return &rqliteConfigStore{db: db, encKey: key, logger: logger}, nil
}
// Get returns the namespace's config, decrypting sensitive fields. Returns
// ErrConfigNotFound if no row exists — caller decides whether to fall
// back to a default.
func (s *rqliteConfigStore) Get(ctx context.Context, namespace string) (*Config, error) {
if namespace == "" {
return nil, fmt.Errorf("push config: namespace required")
}
const q = `SELECT namespace, ntfy_base_url, ntfy_auth_token_encrypted,
expo_access_token_encrypted, updated_at, updated_by
FROM namespace_push_config WHERE namespace = ? LIMIT 1`
var rows []struct {
Namespace string `db:"namespace"`
NtfyBaseURL string `db:"ntfy_base_url"`
NtfyAuthTokenEncrypted string `db:"ntfy_auth_token_encrypted"`
ExpoAccessTokenEncrypted string `db:"expo_access_token_encrypted"`
UpdatedAt int64 `db:"updated_at"`
UpdatedBy string `db:"updated_by"`
}
if err := s.db.Query(ctx, &rows, q, namespace); err != nil {
// gorqlite stdlib path collapses "0 rows" to nil err; only real
// errors get here. ErrNoRows from sql is also treated below.
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrConfigNotFound
}
return nil, fmt.Errorf("push config: query: %w", err)
}
if len(rows) == 0 {
return nil, ErrConfigNotFound
}
r := rows[0]
cfg := &Config{
Namespace: r.Namespace,
NtfyBaseURL: r.NtfyBaseURL,
UpdatedAt: r.UpdatedAt,
UpdatedBy: r.UpdatedBy,
}
if r.NtfyAuthTokenEncrypted != "" {
v, err := secrets.Decrypt(r.NtfyAuthTokenEncrypted, s.encKey)
if err != nil {
return nil, fmt.Errorf("push config: decrypt ntfy auth token: %w", err)
}
cfg.NtfyAuthToken = v
}
if r.ExpoAccessTokenEncrypted != "" {
v, err := secrets.Decrypt(r.ExpoAccessTokenEncrypted, s.encKey)
if err != nil {
return nil, fmt.Errorf("push config: decrypt expo access token: %w", err)
}
cfg.ExpoAccessToken = v
}
return cfg, nil
}
// Upsert writes or replaces the namespace's config. Sensitive fields are
// encrypted before storage. Empty strings for credential fields clear them.
func (s *rqliteConfigStore) Upsert(ctx context.Context, cfg Config) error {
if cfg.Namespace == "" {
return fmt.Errorf("push config: namespace required")
}
var ntfyEnc, expoEnc string
if cfg.NtfyAuthToken != "" {
v, err := secrets.Encrypt(cfg.NtfyAuthToken, s.encKey)
if err != nil {
return fmt.Errorf("push config: encrypt ntfy auth token: %w", err)
}
ntfyEnc = v
}
if cfg.ExpoAccessToken != "" {
v, err := secrets.Encrypt(cfg.ExpoAccessToken, s.encKey)
if err != nil {
return fmt.Errorf("push config: encrypt expo access token: %w", err)
}
expoEnc = v
}
updatedAt := cfg.UpdatedAt
if updatedAt == 0 {
updatedAt = time.Now().Unix()
}
const q = `INSERT INTO namespace_push_config (
namespace, ntfy_base_url, ntfy_auth_token_encrypted,
expo_access_token_encrypted, updated_at, updated_by
) VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(namespace) DO UPDATE SET
ntfy_base_url = excluded.ntfy_base_url,
ntfy_auth_token_encrypted = excluded.ntfy_auth_token_encrypted,
expo_access_token_encrypted = excluded.expo_access_token_encrypted,
updated_at = excluded.updated_at,
updated_by = excluded.updated_by`
if _, err := s.db.Exec(ctx, q,
cfg.Namespace, cfg.NtfyBaseURL, ntfyEnc, expoEnc, updatedAt, cfg.UpdatedBy,
); err != nil {
return fmt.Errorf("push config: upsert: %w", err)
}
return nil
}
// Delete clears the namespace's config row. After this the manager falls
// back to YAML defaults (or refuses if no defaults set).
func (s *rqliteConfigStore) Delete(ctx context.Context, namespace string) error {
if namespace == "" {
return fmt.Errorf("push config: namespace required")
}
const q = `DELETE FROM namespace_push_config WHERE namespace = ?`
if _, err := s.db.Exec(ctx, q, namespace); err != nil {
return fmt.Errorf("push config: delete: %w", err)
}
return nil
}

265
core/pkg/push/manager.go Normal file
View File

@ -0,0 +1,265 @@
package push
// manager.go — top-level entry point for sending push notifications,
// with per-namespace provider configuration that tenants self-serve.
//
// The Manager wraps:
// - a ConfigStore (per-namespace overrides in RQLite)
// - a fallback Defaults (gateway YAML — the cluster-wide default if
// a namespace hasn't set its own)
// - an LRU cache of built dispatchers keyed by namespace
//
// Every send-path (HTTP, WASM hostfunc) goes through Manager.SendToUser
// or Manager.Send. The cache eliminates per-call config decryption +
// provider construction overhead — only the FIRST send for a namespace
// pays that cost; subsequent sends are zero-allocation lookups.
//
// Cache invalidation: HTTP config-change handlers call Invalidate after
// PUT/DELETE so the next send rebuilds with fresh config.
//
// See bug #220 follow-up. Generic by design — same pattern works for
// any future "tenant should self-serve this knob" feature.
import (
"container/list"
"context"
"errors"
"fmt"
"sync"
"go.uber.org/zap"
)
// ProviderFactory builds the set of providers for a resolved Config.
// Injected by the gateway so the push package itself doesn't import
// the provider sub-packages (avoids a circular dependency: provider
// sub-packages already depend on push for the PushProvider interface).
//
// The factory is called once per fresh dispatcher build (cache miss).
// Empty slice is allowed and means "this config produces no providers";
// Manager treats that as ErrPushNotConfigured.
type ProviderFactory func(cfg Config) []PushProvider
// ErrPushNotConfigured is returned by Send when the namespace has no
// per-namespace config AND the gateway has no fallback defaults — i.e.
// nothing to send through. Distinguish from ErrNoDevices (different
// failure mode).
var ErrPushNotConfigured = errors.New("push not configured for namespace; set ntfy_base_url or expo_access_token via PUT /v1/push/config")
// Defaults are the gateway-YAML fallback when a namespace hasn't set its
// own config. Any field set here applies to every namespace that doesn't
// override it. Empty Defaults means "no global default — namespace must
// set their own".
type Defaults struct {
NtfyBaseURL string
NtfyAuthToken string
ExpoAccessToken string
}
// IsEmpty returns true when no fallback provider is set.
func (d Defaults) IsEmpty() bool {
return d.NtfyBaseURL == "" && d.ExpoAccessToken == ""
}
// Manager is the top-level push entry point. Build with NewManager and
// hand out via the gateway's dependencies. Safe for concurrent use.
type Manager struct {
store ConfigStore
devices PushDeviceStore
defaults Defaults
factory ProviderFactory
logger *zap.Logger
// cache LRU of namespace → built dispatcher.
mu sync.Mutex
cache map[string]*list.Element
lru *list.List
cacheCap int
}
// cacheEntry is the doubly-linked-list node + dispatcher payload.
type cacheEntry struct {
namespace string
dispatcher *PushDispatcher
}
// defaultCacheCap caps how many namespaces' dispatchers we hold in memory.
// Each entry is small (~few hundred bytes); 256 is generous and bounds
// memory under abuse.
const defaultCacheCap = 256
// NewManager constructs a Manager with the given device store, config
// store, fallback Defaults, and ProviderFactory.
//
// The ProviderFactory is REQUIRED — it tells the manager how to turn a
// resolved Config into PushProvider instances. The gateway's
// dependencies wires this up (it imports the provider sub-packages and
// returns the right providers for each config field that's populated).
func NewManager(devices PushDeviceStore, store ConfigStore, defaults Defaults, factory ProviderFactory, logger *zap.Logger) *Manager {
if logger == nil {
logger = zap.NewNop()
}
return &Manager{
store: store,
devices: devices,
defaults: defaults,
factory: factory,
logger: logger,
cache: make(map[string]*list.Element, defaultCacheCap),
lru: list.New(),
cacheCap: defaultCacheCap,
}
}
// SendToUser dispatches a push to every device registered for the user
// in the given namespace. Looks up per-namespace config (or falls back
// to defaults), builds the appropriate dispatcher, and sends.
//
// Returns:
// - nil on success or "no devices" (per existing PushDispatcher
// contract — sending push to a user with zero devices is a no-op)
// - ErrPushNotConfigured when neither namespace config nor gateway
// defaults provide a working provider
func (m *Manager) SendToUser(ctx context.Context, namespace, userID string, msg PushMessage) error {
d, err := m.dispatcherFor(ctx, namespace)
if err != nil {
return err
}
return d.SendToUser(ctx, namespace, userID, msg)
}
// DeviceStore exposes the underlying device store so HTTP handlers
// (register/list/delete) can use it directly without going through the
// dispatcher path.
func (m *Manager) DeviceStore() PushDeviceStore {
return m.devices
}
// IsConfigured returns true when the namespace has per-namespace config
// OR usable fallback defaults — i.e. SendToUser would not fail with
// ErrPushNotConfigured. Cheap path-check used by HTTP 503 responses.
func (m *Manager) IsConfigured(ctx context.Context, namespace string) bool {
if !m.defaults.IsEmpty() {
return true
}
if m.store == nil {
return false
}
cfg, err := m.store.Get(ctx, namespace)
if err != nil || cfg == nil {
return false
}
return !cfg.IsEmpty()
}
// Invalidate evicts the cached dispatcher for a namespace. Call after a
// successful PUT/DELETE on /v1/push/config so the next SendToUser uses
// fresh config.
func (m *Manager) Invalidate(namespace string) {
m.mu.Lock()
defer m.mu.Unlock()
if elem, ok := m.cache[namespace]; ok {
m.lru.Remove(elem)
delete(m.cache, namespace)
}
}
// dispatcherFor returns a (cached or freshly built) dispatcher with the
// providers configured for the given namespace.
func (m *Manager) dispatcherFor(ctx context.Context, namespace string) (*PushDispatcher, error) {
// Fast path — already cached.
m.mu.Lock()
if elem, ok := m.cache[namespace]; ok {
m.lru.MoveToFront(elem)
entry := elem.Value.(*cacheEntry)
m.mu.Unlock()
return entry.dispatcher, nil
}
m.mu.Unlock()
// Slow path — load config + build dispatcher.
d, err := m.buildDispatcher(ctx, namespace)
if err != nil {
return nil, err
}
// Insert into cache (eviction if at capacity).
m.mu.Lock()
defer m.mu.Unlock()
// Recheck under lock — another goroutine may have built one.
if elem, ok := m.cache[namespace]; ok {
m.lru.MoveToFront(elem)
return elem.Value.(*cacheEntry).dispatcher, nil
}
if m.lru.Len() >= m.cacheCap {
oldest := m.lru.Back()
if oldest != nil {
old := oldest.Value.(*cacheEntry)
m.lru.Remove(oldest)
delete(m.cache, old.namespace)
}
}
entry := &cacheEntry{namespace: namespace, dispatcher: d}
m.cache[namespace] = m.lru.PushFront(entry)
return d, nil
}
// buildDispatcher resolves the effective config for the namespace and
// constructs a fresh PushDispatcher with the right providers registered.
//
// Effective-config resolution:
// 1. Start with gateway YAML defaults
// 2. If a per-namespace row exists, override field-by-field
// 3. If the result has zero providers, return ErrPushNotConfigured
func (m *Manager) buildDispatcher(ctx context.Context, namespace string) (*PushDispatcher, error) {
eff := Config{Namespace: namespace}
// Start from defaults.
eff.NtfyBaseURL = m.defaults.NtfyBaseURL
eff.NtfyAuthToken = m.defaults.NtfyAuthToken
eff.ExpoAccessToken = m.defaults.ExpoAccessToken
// Layer namespace overrides if any.
if m.store != nil {
nc, err := m.store.Get(ctx, namespace)
if err != nil && !errors.Is(err, ErrConfigNotFound) {
return nil, fmt.Errorf("manager: load namespace config: %w", err)
}
if nc != nil {
// Each field overrides the default if non-empty. An explicit
// empty value via PUT clears the namespace's row entirely
// (DELETE) — there's no "set this field to empty to clear"
// half-state, by design.
if nc.NtfyBaseURL != "" {
eff.NtfyBaseURL = nc.NtfyBaseURL
}
if nc.NtfyAuthToken != "" {
eff.NtfyAuthToken = nc.NtfyAuthToken
}
if nc.ExpoAccessToken != "" {
eff.ExpoAccessToken = nc.ExpoAccessToken
}
}
}
// Refuse to build a dispatcher with no providers — caller gets a
// clear error instead of a silent no-op.
if eff.NtfyBaseURL == "" && eff.ExpoAccessToken == "" {
return nil, ErrPushNotConfigured
}
if m.factory == nil {
// Defensive: a Manager built without a factory can't produce
// providers. Programmer error; surface explicitly.
return nil, fmt.Errorf("manager: no provider factory configured")
}
providers := m.factory(eff)
if len(providers) == 0 {
return nil, ErrPushNotConfigured
}
d := New(m.devices, m.logger)
for _, p := range providers {
d.Register(p)
}
return d, nil
}

View File

@ -0,0 +1,312 @@
package push
import (
"context"
"errors"
"sync"
"sync/atomic"
"testing"
"go.uber.org/zap"
)
// fakeConfigStore is a minimal in-memory ConfigStore for tests.
type fakeConfigStore struct {
mu sync.Mutex
configs map[string]*Config
getErr error // optional injected error
calls atomic.Int32
}
func newFakeConfigStore() *fakeConfigStore {
return &fakeConfigStore{configs: map[string]*Config{}}
}
func (f *fakeConfigStore) Get(_ context.Context, ns string) (*Config, error) {
f.calls.Add(1)
if f.getErr != nil {
return nil, f.getErr
}
f.mu.Lock()
defer f.mu.Unlock()
cfg, ok := f.configs[ns]
if !ok {
return nil, ErrConfigNotFound
}
// Return a copy so the caller can't mutate our state.
cp := *cfg
return &cp, nil
}
func (f *fakeConfigStore) Upsert(_ context.Context, c Config) error {
f.mu.Lock()
defer f.mu.Unlock()
cp := c
f.configs[c.Namespace] = &cp
return nil
}
func (f *fakeConfigStore) Delete(_ context.Context, ns string) error {
f.mu.Lock()
defer f.mu.Unlock()
delete(f.configs, ns)
return nil
}
// managerFakeProvider is a stub PushProvider for manager tests.
// (The dispatcher tests already declare a `fakeProvider`; rename ours.)
type managerFakeProvider struct {
name string
sends atomic.Int32
}
func (p *managerFakeProvider) Name() string { return p.name }
func (p *managerFakeProvider) Send(_ context.Context, _ PushMessage) error {
p.sends.Add(1)
return nil
}
func (p *managerFakeProvider) SendBulk(_ context.Context, msgs []PushMessage) []error {
for range msgs {
p.sends.Add(1)
}
return nil
}
func TestManager_namespace_with_no_config_uses_defaults(t *testing.T) {
store := newFakeConfigStore() // empty
defaults := Defaults{NtfyBaseURL: "http://default-ntfy"}
var providerCalls atomic.Int32
factory := func(c Config) []PushProvider {
providerCalls.Add(1)
// Verify the manager passed defaults through to the factory.
if c.NtfyBaseURL != "http://default-ntfy" {
t.Errorf("factory got URL=%q, want default", c.NtfyBaseURL)
}
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
}
m := NewManager(&fakeDeviceStore{}, store, defaults, factory, zap.NewNop())
d, err := m.dispatcherFor(context.Background(), "ns-A")
if err != nil {
t.Fatalf("expected default to satisfy: %v", err)
}
if d == nil {
t.Fatal("expected a dispatcher")
}
if providerCalls.Load() != 1 {
t.Errorf("factory should have been called once, got %d", providerCalls.Load())
}
}
func TestManager_namespace_config_overrides_defaults(t *testing.T) {
store := newFakeConfigStore()
store.Upsert(context.Background(), Config{
Namespace: "ns-A",
NtfyBaseURL: "http://override-ntfy",
})
defaults := Defaults{NtfyBaseURL: "http://default-ntfy"}
var seenURL string
factory := func(c Config) []PushProvider {
seenURL = c.NtfyBaseURL
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
}
m := NewManager(&fakeDeviceStore{}, store, defaults, factory, zap.NewNop())
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
t.Fatalf("dispatcherFor: %v", err)
}
if seenURL != "http://override-ntfy" {
t.Errorf("factory got URL=%q, want the namespace override", seenURL)
}
}
func TestManager_no_config_no_defaults_returns_ErrPushNotConfigured(t *testing.T) {
store := newFakeConfigStore()
factory := func(_ Config) []PushProvider { return nil }
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
_, err := m.dispatcherFor(context.Background(), "ns-A")
if !errors.Is(err, ErrPushNotConfigured) {
t.Errorf("expected ErrPushNotConfigured, got %v", err)
}
}
func TestManager_caches_dispatchers_per_namespace(t *testing.T) {
store := newFakeConfigStore()
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "u"})
var factoryCalls atomic.Int32
factory := func(_ Config) []PushProvider {
factoryCalls.Add(1)
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
}
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
// Two calls for the same namespace = one factory invocation (cache hit).
for i := 0; i < 5; i++ {
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
t.Fatalf("iter %d: %v", i, err)
}
}
if factoryCalls.Load() != 1 {
t.Errorf("expected 1 factory call due to cache, got %d", factoryCalls.Load())
}
}
func TestManager_invalidate_forces_rebuild(t *testing.T) {
store := newFakeConfigStore()
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "v1"})
var seenURLs []string
factory := func(c Config) []PushProvider {
seenURLs = append(seenURLs, c.NtfyBaseURL)
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
}
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
t.Fatal(err)
}
// Update config + invalidate.
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "v2"})
m.Invalidate("ns-A")
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
t.Fatal(err)
}
if len(seenURLs) != 2 || seenURLs[0] != "v1" || seenURLs[1] != "v2" {
t.Errorf("expected factory to be called twice with v1 then v2, got %v", seenURLs)
}
}
func TestManager_per_namespace_isolation(t *testing.T) {
// ns-A has its own config; ns-B has none and uses defaults.
store := newFakeConfigStore()
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "ns-A-url"})
defaults := Defaults{NtfyBaseURL: "default-url"}
urlByNS := make(map[string]string)
var mu sync.Mutex
factory := func(c Config) []PushProvider {
mu.Lock()
urlByNS[c.Namespace] = c.NtfyBaseURL
mu.Unlock()
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
}
m := NewManager(&fakeDeviceStore{}, store, defaults, factory, zap.NewNop())
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
t.Fatal(err)
}
if _, err := m.dispatcherFor(context.Background(), "ns-B"); err != nil {
t.Fatal(err)
}
mu.Lock()
defer mu.Unlock()
if urlByNS["ns-A"] != "ns-A-url" {
t.Errorf("ns-A got %q, want ns-A-url", urlByNS["ns-A"])
}
if urlByNS["ns-B"] != "default-url" {
t.Errorf("ns-B got %q, want default-url", urlByNS["ns-B"])
}
}
func TestManager_IsConfigured_distinguishes_states(t *testing.T) {
store := newFakeConfigStore()
store.Upsert(context.Background(), Config{Namespace: "with-cfg", NtfyBaseURL: "u"})
// Without defaults, only namespace with config returns true.
m1 := NewManager(&fakeDeviceStore{}, store, Defaults{}, nil, zap.NewNop())
if !m1.IsConfigured(context.Background(), "with-cfg") {
t.Error("expected with-cfg to be configured")
}
if m1.IsConfigured(context.Background(), "no-cfg") {
t.Error("expected no-cfg to NOT be configured (no defaults)")
}
// With defaults, every namespace is configured.
m2 := NewManager(&fakeDeviceStore{}, store, Defaults{NtfyBaseURL: "d"}, nil, zap.NewNop())
if !m2.IsConfigured(context.Background(), "no-cfg") {
t.Error("expected no-cfg to be configured via defaults")
}
}
func TestManager_concurrent_dispatcherFor_no_race(t *testing.T) {
// Run with -race.
store := newFakeConfigStore()
store.Upsert(context.Background(), Config{Namespace: "ns", NtfyBaseURL: "u"})
factory := func(_ Config) []PushProvider { return []PushProvider{&managerFakeProvider{name: "ntfy"}} }
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
var wg sync.WaitGroup
for g := 0; g < 16; g++ {
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < 50; i++ {
_, _ = m.dispatcherFor(context.Background(), "ns")
}
}()
}
wg.Wait()
}
// fakeDeviceStore is a stub PushDeviceStore for manager tests.
type fakeDeviceStore struct{}
func (s *fakeDeviceStore) Upsert(_ context.Context, _ PushDevice) error { return nil }
func (s *fakeDeviceStore) Delete(_ context.Context, _, _ string) error { return nil }
func (s *fakeDeviceStore) ListForUser(_ context.Context, _, _ string) ([]PushDevice, error) {
return nil, nil
}
func TestConfig_Redacted_omits_secrets(t *testing.T) {
cfg := &Config{
Namespace: "ns",
NtfyBaseURL: "https://ntfy.example",
NtfyAuthToken: "super-secret-token",
ExpoAccessToken: "another-super-secret",
UpdatedAt: 12345,
UpdatedBy: "admin",
}
r := cfg.Redacted()
if r.NtfyBaseURL != "https://ntfy.example" {
t.Errorf("ntfy URL must be present (it's not a secret), got %q", r.NtfyBaseURL)
}
if !r.HasNtfyAuthToken {
t.Error("HasNtfyAuthToken must reflect that the token is set")
}
if !r.HasExpoAccessToken {
t.Error("HasExpoAccessToken must reflect that the token is set")
}
// Most importantly: the tokens themselves must not appear anywhere
// in the redacted view. Walk the struct's exported fields.
if r.UpdatedAt != 12345 {
t.Errorf("UpdatedAt should round-trip, got %d", r.UpdatedAt)
}
}
func TestConfig_IsEmpty(t *testing.T) {
cases := []struct {
name string
c *Config
want bool
}{
{"nil pointer", nil, true},
{"zero value", &Config{}, true},
{"namespace only", &Config{Namespace: "ns"}, true},
{"with ntfy URL", &Config{NtfyBaseURL: "u"}, false},
{"with expo token", &Config{ExpoAccessToken: "t"}, false},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if got := c.c.IsEmpty(); got != c.want {
t.Errorf("IsEmpty() = %v, want %v", got, c.want)
}
})
}
}

View File

@ -0,0 +1,171 @@
package serverless
import (
"context"
"strings"
"sync"
"testing"
"go.uber.org/zap"
)
// minimalWASM is the same nop-only WASM from TestEngine_Execute —
// exports _start, returns immediately. Sufficient for proving that
// ExecuteModule's instantiation contract works under repeated /
// concurrent calls without name-collision (bug #221).
var minimalWASM = []byte{
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
0x01, 0x04, 0x01, 0x60, 0x00, 0x00,
0x03, 0x02, 0x01, 0x00,
0x07, 0x0a, 0x01, 0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x00, 0x00,
0x0a, 0x04, 0x01, 0x02, 0x00, 0x0b,
}
// Bug #221 regression: invocations of the SAME function must succeed
// repeatedly. Before the fix, the second invocation hit
// `module[<fn>] has already been instantiated` because the executor
// passed the function name to wazero's WithName which globally
// registered it.
func TestEngine_RepeatedInvocations_SameFunction_NoCollision(t *testing.T) {
logger := zap.NewNop()
registry := NewMockRegistry()
hostServices := NewMockHostServices()
engine, err := NewEngine(nil, registry, hostServices, logger)
if err != nil {
t.Fatalf("NewEngine: %v", err)
}
defer engine.Close(context.Background())
fnDef := &FunctionDefinition{
Name: "username-check",
Namespace: "anchat-test",
MemoryLimitMB: 64,
TimeoutSeconds: 5,
}
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
t.Fatalf("Register: %v", err)
}
fn, err := registry.Get(context.Background(), "anchat-test", "username-check", 0)
if err != nil {
t.Fatalf("Get: %v", err)
}
// Five sequential invocations — each must succeed.
for i := 0; i < 5; i++ {
if _, err := engine.Execute(context.Background(), fn, []byte("x"), nil); err != nil {
t.Fatalf("invocation %d failed: %v", i, err)
}
}
}
// Bug #221 regression: two CONCURRENT invocations of the same function
// must both succeed. Before the fix this was THE most reliable repro —
// the second goroutine's InstantiateModule lost the race for the global
// name slot.
func TestEngine_ConcurrentInvocations_SameFunction_NoCollision(t *testing.T) {
logger := zap.NewNop()
registry := NewMockRegistry()
hostServices := NewMockHostServices()
engine, err := NewEngine(nil, registry, hostServices, logger)
if err != nil {
t.Fatalf("NewEngine: %v", err)
}
defer engine.Close(context.Background())
fnDef := &FunctionDefinition{
Name: "user-create",
Namespace: "anchat-test",
MemoryLimitMB: 64,
TimeoutSeconds: 5,
}
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
t.Fatalf("Register: %v", err)
}
fn, err := registry.Get(context.Background(), "anchat-test", "user-create", 0)
if err != nil {
t.Fatalf("Get: %v", err)
}
const goroutines = 16
const iterations = 10
var (
wg sync.WaitGroup
errMu sync.Mutex
firstErr error
errPayloads []string
)
for g := 0; g < goroutines; g++ {
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < iterations; i++ {
if _, err := engine.Execute(context.Background(), fn, []byte("x"), nil); err != nil {
errMu.Lock()
if firstErr == nil {
firstErr = err
}
errPayloads = append(errPayloads, err.Error())
errMu.Unlock()
return
}
}
}()
}
wg.Wait()
if firstErr != nil {
// Surface ALL distinct error messages — easier to debug than the
// first one alone if the runtime starts failing weirdly.
t.Fatalf("concurrent invocations hit %d errors. first: %v\nall: %v",
len(errPayloads), firstErr, errPayloads)
}
}
// Bug #221 regression: re-deploy of the same function (new version, new
// WASM CID) followed by an invocation must succeed. The old compiled
// module's cache slot must not block the new instance from running.
func TestEngine_ReDeploy_ThenInvoke_NoCollision(t *testing.T) {
logger := zap.NewNop()
registry := NewMockRegistry()
hostServices := NewMockHostServices()
engine, err := NewEngine(nil, registry, hostServices, logger)
if err != nil {
t.Fatalf("NewEngine: %v", err)
}
defer engine.Close(context.Background())
fnDef := &FunctionDefinition{
Name: "wallet-link",
Namespace: "anchat-test",
MemoryLimitMB: 64,
TimeoutSeconds: 5,
}
// First deploy + invoke.
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
t.Fatalf("first Register: %v", err)
}
fn1, _ := registry.Get(context.Background(), "anchat-test", "wallet-link", 0)
if _, err := engine.Execute(context.Background(), fn1, []byte("x"), nil); err != nil {
t.Fatalf("first invoke: %v", err)
}
// Re-deploy: same name, new bytes (still valid). MockRegistry overwrites
// the function record; the engine sees a fresh wasmCID.
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
t.Fatalf("re-deploy: %v", err)
}
fn2, _ := registry.Get(context.Background(), "anchat-test", "wallet-link", 0)
// Multiple invocations after re-deploy — exactly the AnChat repro.
for i := 0; i < 3; i++ {
_, err := engine.Execute(context.Background(), fn2, []byte("x"), nil)
if err != nil {
if strings.Contains(err.Error(), "already been instantiated") {
t.Fatalf("bug #221 regression: %v", err)
}
t.Fatalf("post-redeploy invoke %d failed: %v", i, err)
}
}
}

View File

@ -34,6 +34,20 @@ func NewExecutor(runtime wazero.Runtime, logger *zap.Logger, maxConcurrent int)
// ExecuteModule instantiates and runs a WASM module with the given input. // ExecuteModule instantiates and runs a WASM module with the given input.
// The contextSetter callback is used to set invocation context on host services. // The contextSetter callback is used to set invocation context on host services.
//
// Bug #221 fix: each invocation gets an ANONYMOUS module instance (no name
// in the wazero runtime registry). Previously we used the function name
// as the module name, which made wazero refuse the second instantiation
// with "module[<fn>] has already been instantiated" any time:
//
// - Two invocations of the same function ran concurrently
// - A re-deploy happened while a previous invocation was still mid-flight
// - An instance.Close() raced with the next instantiation
//
// Anonymous instances eliminate the collision class entirely. The
// function name is still surfaced via WithArgs(moduleName) so WASI
// `argv[0]` shows it inside the function (matches the prior behavior
// that user code may depend on).
func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledModule, moduleName string, input []byte, contextSetter func(), contextClearer func()) ([]byte, error) { func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledModule, moduleName string, input []byte, contextSetter func(), contextClearer func()) ([]byte, error) {
// Set invocation context for host functions // Set invocation context for host functions
if contextSetter != nil { if contextSetter != nil {
@ -48,9 +62,14 @@ func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledMo
stdout := new(bytes.Buffer) stdout := new(bytes.Buffer)
stderr := new(bytes.Buffer) stderr := new(bytes.Buffer)
// Create module configuration with WASI stdio // Create module configuration with WASI stdio.
//
// WithName("") = anonymous instance, NOT registered under the runtime's
// global module name table. argv[0] still carries the function name for
// WASI-consuming code that reads os.Args[0] (TinyGo's runtime + apps
// that log it). See doc comment above for the bug-#221 rationale.
moduleConfig := wazero.NewModuleConfig(). moduleConfig := wazero.NewModuleConfig().
WithName(moduleName). WithName("").
WithStdin(stdin). WithStdin(stdin).
WithStdout(stdout). WithStdout(stdout).
WithStderr(stderr). WithStderr(stderr).

View File

@ -16,10 +16,14 @@ import (
// NewHostFunctions creates a new HostFunctions instance. // NewHostFunctions creates a new HostFunctions instance.
// //
// pushDispatcher and wsBridge may be nil when those features aren't // pushDispatcher / pushManager / wsBridge may be nil when those features
// configured on this gateway — in that case PushSend silently no-ops // aren't configured on this gateway. PushSend prefers pushManager when
// (so functions stay portable) and WSPubSubBridge returns an explicit // present (per-namespace config takes effect), falls back to pushDispatcher,
// error (because absence of a requested bridge should be visible). // and silently no-ops when both are nil — so functions stay portable
// across deployments with/without push.
//
// WSPubSubBridge returns an explicit error when wsBridge is nil because
// absence of a requested bridge should be visible (callers asked for it).
func NewHostFunctions( func NewHostFunctions(
db rqlite.Client, db rqlite.Client,
cacheClient olriclib.Client, cacheClient olriclib.Client,
@ -28,6 +32,7 @@ func NewHostFunctions(
wsManager serverless.WebSocketManager, wsManager serverless.WebSocketManager,
secrets serverless.SecretsManager, secrets serverless.SecretsManager,
pushDispatcher *push.PushDispatcher, pushDispatcher *push.PushDispatcher,
pushManager *push.Manager,
wsBridge *wsbridge.Bridge, wsBridge *wsbridge.Bridge,
cfg HostFunctionsConfig, cfg HostFunctionsConfig,
logger *zap.Logger, logger *zap.Logger,
@ -46,6 +51,7 @@ func NewHostFunctions(
wsManager: wsManager, wsManager: wsManager,
secrets: secrets, secrets: secrets,
pushDispatcher: pushDispatcher, pushDispatcher: pushDispatcher,
pushManager: pushManager,
wsBridge: wsBridge, wsBridge: wsBridge,
httpClient: tlsutil.NewHTTPClient(httpTimeout), httpClient: tlsutil.NewHTTPClient(httpTimeout),
logger: logger, logger: logger,

View File

@ -33,10 +33,16 @@ const MaxPushSendArgsBytes = 16 * 1024
// namespace — the dispatcher reads the namespace from the invocation // namespace — the dispatcher reads the namespace from the invocation
// context (set by the engine before invoking). // context (set by the engine before invoking).
// //
// If push is not configured on this gateway (no dispatcher), this returns // If push is not configured on this gateway (no dispatcher AND no
// nil (silent no-op) so functions remain portable across environments. // manager), this returns nil (silent no-op) so functions remain portable
// across environments.
//
// When the manager is present (bug #220 follow-up), it routes through
// per-namespace config — tenants who have set their own ntfy / expo via
// PUT /v1/push/config get their providers; namespaces with no config
// fall back to the gateway YAML defaults via the manager's resolution.
func (h *HostFunctions) PushSend(ctx context.Context, userID string, msgJSON []byte) error { func (h *HostFunctions) PushSend(ctx context.Context, userID string, msgJSON []byte) error {
if h.pushDispatcher == nil { if h.pushManager == nil && h.pushDispatcher == nil {
// Silent no-op — push isn't configured on this gateway. // Silent no-op — push isn't configured on this gateway.
return nil return nil
} }
@ -96,8 +102,24 @@ func (h *HostFunctions) PushSend(ctx context.Context, userID string, msgJSON []b
Data: args.Data, Data: args.Data,
} }
if err := h.pushDispatcher.SendToUser(ctx, namespace, userID, msg); err != nil { // Route through Manager when present so per-namespace push config
return &serverless.HostFunctionError{Function: "push_send", Cause: err} // (set via PUT /v1/push/config) takes effect. The Manager itself
// falls back to YAML defaults if the namespace hasn't set anything.
var sendErr error
if h.pushManager != nil {
sendErr = h.pushManager.SendToUser(ctx, namespace, userID, msg)
// ErrPushNotConfigured here would mean: no per-namespace config
// AND no YAML defaults. Treat as silent no-op for portability —
// same contract as "no dispatcher at all". Functions can't depend
// on push being available.
if sendErr != nil && sendErr.Error() == push.ErrPushNotConfigured.Error() {
return nil
}
} else {
sendErr = h.pushDispatcher.SendToUser(ctx, namespace, userID, msg)
}
if sendErr != nil {
return &serverless.HostFunctionError{Function: "push_send", Cause: sendErr}
} }
return nil return nil
} }

View File

@ -34,9 +34,13 @@ type HostFunctions struct {
httpClient *http.Client httpClient *http.Client
logger *zap.Logger logger *zap.Logger
// pushDispatcher may be nil when push isn't configured for this gateway. // pushDispatcher (legacy) and pushManager (per-namespace, bug #220
// In that case PushSend returns nil silently — see hostfunctions/push.go. // follow-up) provide push send-paths. When pushManager is set, PushSend
// uses it so per-namespace config takes effect; pushDispatcher is the
// fallback. Both nil = push not configured anywhere on this gateway,
// PushSend returns nil silently for portability.
pushDispatcher *push.PushDispatcher pushDispatcher *push.PushDispatcher
pushManager *push.Manager
// wsBridge may be nil when the gateway doesn't run a bridge. In that // wsBridge may be nil when the gateway doesn't run a bridge. In that
// case WSPubSubBridge returns an error rather than silently no-oping // case WSPubSubBridge returns an error rather than silently no-oping

View File

@ -5,6 +5,7 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"strings"
"time" "time"
"github.com/google/uuid" "github.com/google/uuid"
@ -420,26 +421,55 @@ func (i *Invoker) BatchInvoke(ctx context.Context, req *BatchInvokeRequest) (*Ba
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// CanInvoke checks if a caller is authorized to invoke a function. // CanInvoke checks if a caller is authorized to invoke a function.
//
// Authorization model:
// - Public functions (`is_public: true`): anyone can invoke, no auth needed.
// The auth middleware lets unauthenticated requests through to public
// paths.
// - Private functions: any caller that the auth middleware has already
// authenticated for THIS namespace can invoke. By the time we reach
// this function, the request has passed authMiddleware which validates
// EITHER a JWT-bearing token whose `namespace` claim matches the target
// namespace OR an API key resolved to the target namespace. So the
// non-empty `callerWallet` here is sufficient evidence of a verified
// in-namespace caller.
//
// History (bug #215 follow-up): the previous logic was a stub —
//
// return callerWallet == namespace || fn.CreatedBy == callerWallet, nil
//
// — which only allowed namespace-name-as-wallet (the API-key fallback) or
// the deploying wallet. Onboarding-style functions like `user-create`,
// where a brand-new wallet calls in to register, were rejected with 401
// because the new wallet was neither the namespace string nor the
// deployer. They worked only as a side-effect of JWT verification
// silently failing pre-#215, falling through to API-key auth, and
// callerWallet collapsing to the namespace string. Once JWT verify was
// fixed, the underlying flaw surfaced.
//
// Fine-grained per-function ACLs (group membership, roles) are deferred
// until there's a concrete tenant requirement. Today, "private" means
// "authenticated in-namespace caller required" and that's enforced
// here + at authMiddleware.
func (i *Invoker) CanInvoke(ctx context.Context, namespace, functionName string, callerWallet string) (bool, error) { func (i *Invoker) CanInvoke(ctx context.Context, namespace, functionName string, callerWallet string) (bool, error) {
fn, err := i.registry.Get(ctx, namespace, functionName, 0) fn, err := i.registry.Get(ctx, namespace, functionName, 0)
if err != nil { if err != nil {
return false, err return false, err
} }
// Public functions can be invoked by anyone // Public functions can be invoked by anyone (auth middleware allows
// the request through without credentials).
if fn.IsPublic { if fn.IsPublic {
return true, nil return true, nil
} }
// Non-public functions require the caller to be in the same namespace // Private function: require an authenticated caller. The auth
// (simplified authorization - can be extended) // middleware has already verified the caller belongs to this
if callerWallet == "" { // namespace (either via JWT `namespace` claim or via API-key
return false, nil // namespace lookup) before this function ever runs, so the only
} // thing we need to confirm here is that the caller has SOME
// identity at all (i.e. the request wasn't anonymous).
// For now, just check if caller wallet matches namespace return strings.TrimSpace(callerWallet) != "", nil
// In production, you'd check group membership, roles, etc.
return callerWallet == namespace || fn.CreatedBy == callerWallet, nil
} }
// GetFunctionInfo returns basic info about a function for invocation. // GetFunctionInfo returns basic info about a function for invocation.

View File

@ -0,0 +1,138 @@
package serverless
import (
"context"
"testing"
"go.uber.org/zap"
)
// canInvokeMockRegistry is a minimal FunctionRegistry surface for the
// CanInvoke tests. Only Get is exercised; everything else panics so
// accidental drift is loud.
type canInvokeMockRegistry struct {
FunctionRegistry // embedded interface — calling unimplemented methods panics
fn *Function
err error
}
func (m *canInvokeMockRegistry) Get(_ context.Context, _, _ string, _ int) (*Function, error) {
if m.err != nil {
return nil, m.err
}
return m.fn, nil
}
func newInvokerForCanInvokeTest(fn *Function) *Invoker {
return &Invoker{
registry: &canInvokeMockRegistry{fn: fn},
logger: zap.NewNop(),
}
}
// TestCanInvoke_publicAllowsAnyone — public functions bypass the
// authorization check entirely (auth middleware lets unauthenticated
// requests through to the handler too).
func TestCanInvoke_publicAllowsAnyone(t *testing.T) {
inv := newInvokerForCanInvokeTest(&Function{
Namespace: "anchat-test",
Name: "username-check",
IsPublic: true,
})
for _, wallet := range []string{"", "8oxu7UzzaSXc...", "anchat-test"} {
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "username-check", wallet)
if err != nil {
t.Fatalf("wallet=%q: %v", wallet, err)
}
if !ok {
t.Errorf("wallet=%q: public function denied", wallet)
}
}
}
// TestCanInvoke_privateRejectsAnonymous — empty callerWallet means the
// auth middleware didn't establish any identity; private functions reject.
func TestCanInvoke_privateRejectsAnonymous(t *testing.T) {
inv := newInvokerForCanInvokeTest(&Function{
Namespace: "anchat-test",
Name: "user-create",
IsPublic: false,
})
ok, _ := inv.CanInvoke(context.Background(), "anchat-test", "user-create", "")
if ok {
t.Error("anonymous caller should be denied for private function")
}
// Whitespace-only is still anonymous.
ok, _ = inv.CanInvoke(context.Background(), "anchat-test", "user-create", " ")
if ok {
t.Error("whitespace-only callerWallet should be denied")
}
}
// TestCanInvoke_privateAllowsJWTAuthenticatedNewWallet is the regression
// guard for bug #215 follow-up. A brand-new wallet (typical signup flow,
// `user-create` style) calls a private function in a namespace it doesn't
// "own" yet. Must succeed: auth middleware already validated the JWT
// belongs to this namespace, the only role of CanInvoke is to confirm
// SOMEONE is authenticated.
//
// Pre-fix this returned false (the wallet wasn't equal to the namespace
// string and wasn't the deployer), which was the entire reason AnChat saw
// 401 unauthorized after the cluster_secret_path fix unblocked JWT
// verification.
func TestCanInvoke_privateAllowsJWTAuthenticatedNewWallet(t *testing.T) {
inv := newInvokerForCanInvokeTest(&Function{
Namespace: "anchat-test",
Name: "user-create",
IsPublic: false,
CreatedBy: "0xdeployer-wallet-not-this-caller",
})
const newUserWallet = "8oxu7UzzaSXcxZ9B3YuEqr3Qpmx7tgT9HzaA4NUGiand"
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "user-create", newUserWallet)
if err != nil {
t.Fatalf("CanInvoke: %v", err)
}
if !ok {
t.Fatal("new user wallet should be allowed to invoke private function (auth middleware vouches for them)")
}
}
// TestCanInvoke_privateAllowsAPIKeyCaller — API-key callers get
// callerWallet=namespace from the wallet resolver. They should still
// succeed; this preserves the pre-#215 working flow for tenants who
// only use API keys.
func TestCanInvoke_privateAllowsAPIKeyCaller(t *testing.T) {
inv := newInvokerForCanInvokeTest(&Function{
Namespace: "anchat-test",
Name: "user-create",
IsPublic: false,
CreatedBy: "0xdeployer-wallet",
})
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "user-create", "anchat-test")
if err != nil {
t.Fatalf("CanInvoke: %v", err)
}
if !ok {
t.Error("API-key callers (callerWallet=namespace) must keep working")
}
}
// TestCanInvoke_privateAllowsDeployer — the function deployer can invoke
// their own private function. Was true pre-fix and must remain true.
func TestCanInvoke_privateAllowsDeployer(t *testing.T) {
const deployer = "0xdeployer-wallet"
inv := newInvokerForCanInvokeTest(&Function{
Namespace: "anchat-test",
Name: "user-create",
IsPublic: false,
CreatedBy: deployer,
})
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "user-create", deployer)
if err != nil {
t.Fatalf("CanInvoke: %v", err)
}
if !ok {
t.Error("deployer must always be allowed to invoke their own function")
}
}

View File

@ -0,0 +1,217 @@
package triggers
import (
"strings"
"testing"
"time"
)
// ----------------------------------------------------------------------------
// ParseCron — accept paths
// ----------------------------------------------------------------------------
func TestParseCron_fiveField(t *testing.T) {
c, err := ParseCron("0 3 * * *") // every day at 03:00 UTC
if err != nil {
t.Fatalf("ParseCron: %v", err)
}
if c.hasSeconds {
t.Error("hasSeconds = true, want false for 5-field expression")
}
if !c.minutes.match(0) {
t.Error("minutes mask missing 0")
}
if c.minutes.match(1) {
t.Error("minutes mask should not match 1")
}
if !c.hours.match(3) {
t.Error("hours mask missing 3")
}
}
func TestParseCron_sixField(t *testing.T) {
c, err := ParseCron("*/30 * * * * *") // every 30 seconds
if err != nil {
t.Fatalf("ParseCron: %v", err)
}
if !c.hasSeconds {
t.Error("hasSeconds = false, want true for 6-field expression")
}
if !c.seconds.match(0) || !c.seconds.match(30) {
t.Error("seconds mask should match 0 and 30")
}
if c.seconds.match(15) {
t.Error("seconds mask should NOT match 15 for */30")
}
}
func TestParseCron_listsAndRanges(t *testing.T) {
c, err := ParseCron("0,15,30,45 9-17 * * 1-5") // every 15min during business hours, weekdays
if err != nil {
t.Fatalf("ParseCron: %v", err)
}
for _, m := range []int{0, 15, 30, 45} {
if !c.minutes.match(m) {
t.Errorf("minutes mask missing %d", m)
}
}
for _, h := range []int{9, 12, 17} {
if !c.hours.match(h) {
t.Errorf("hours mask missing %d", h)
}
}
if c.hours.match(8) || c.hours.match(18) {
t.Error("hours mask should not match outside 9-17")
}
for _, d := range []int{1, 2, 3, 4, 5} {
if !c.dow.match(d) {
t.Errorf("dow mask missing %d", d)
}
}
if c.dow.match(0) || c.dow.match(6) {
t.Error("dow mask should not match weekend (0 or 6)")
}
}
func TestParseCron_sundayNormalisation(t *testing.T) {
// Cron permits 0 OR 7 for Sunday. Both should be normalised to 0.
c7, err := ParseCron("0 0 * * 7")
if err != nil {
t.Fatalf("ParseCron 7: %v", err)
}
if !c7.dow.match(0) {
t.Error("dow 7 should normalise to match 0 (Sunday)")
}
if c7.dow.match(7) {
t.Error("after normalisation, bit 7 should be cleared (clamped to 0..6)")
}
}
// ----------------------------------------------------------------------------
// ParseCron — reject paths
// ----------------------------------------------------------------------------
func TestParseCron_rejectMalformed(t *testing.T) {
cases := []struct {
expr string
reason string
}{
{"", "empty"},
{" ", "whitespace-only"},
{"* * *", "too few fields"},
{"* * * * * * *", "too many fields"},
{"60 * * * *", "minute out of range"},
{"* 24 * * *", "hour out of range"},
{"* * 0 * *", "day-of-month under range"},
{"* * 32 * *", "day-of-month over range"},
{"* * * 13 *", "month over range"},
{"* * * 0 *", "month under range"},
{"* * * * 8", "day-of-week over range"},
{"abc * * * *", "non-numeric"},
{"5-2 * * * *", "inverted range"},
{"*/0 * * * *", "zero step"},
{"*/-1 * * * *", "negative step"},
}
for _, tc := range cases {
t.Run(tc.reason, func(t *testing.T) {
if _, err := ParseCron(tc.expr); err == nil {
t.Errorf("ParseCron(%q) succeeded; expected error (%s)", tc.expr, tc.reason)
}
})
}
}
// ----------------------------------------------------------------------------
// Next() — correctness
// ----------------------------------------------------------------------------
func TestNext_dailyAt03UTC(t *testing.T) {
c, _ := ParseCron("0 3 * * *")
now := time.Date(2025, 5, 7, 12, 30, 0, 0, time.UTC)
got, err := c.Next(now)
if err != nil {
t.Fatalf("Next: %v", err)
}
want := time.Date(2025, 5, 8, 3, 0, 0, 0, time.UTC)
if !got.Equal(want) {
t.Errorf("Next = %s, want %s", got, want)
}
}
func TestNext_strictlyAfter(t *testing.T) {
// When `after` is exactly on a matching minute, Next must return the
// FOLLOWING match, not `after` itself.
c, _ := ParseCron("0 3 * * *")
now := time.Date(2025, 5, 7, 3, 0, 0, 0, time.UTC) // exactly 03:00
got, _ := c.Next(now)
want := time.Date(2025, 5, 8, 3, 0, 0, 0, time.UTC)
if !got.Equal(want) {
t.Errorf("Next from exact-match should advance one day: got %s, want %s", got, want)
}
}
func TestNext_secondsResolution(t *testing.T) {
c, _ := ParseCron("*/30 * * * * *") // every 30 sec
now := time.Date(2025, 5, 7, 12, 0, 5, 0, time.UTC)
got, _ := c.Next(now)
want := time.Date(2025, 5, 7, 12, 0, 30, 0, time.UTC)
if !got.Equal(want) {
t.Errorf("Next = %s, want %s", got, want)
}
}
func TestNext_monthSkip(t *testing.T) {
// Day-of-month=29, only February — must skip to the next leap year.
c, _ := ParseCron("0 0 29 2 *")
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
got, err := c.Next(now)
if err != nil {
t.Fatalf("Next: %v", err)
}
want := time.Date(2028, 2, 29, 0, 0, 0, 0, time.UTC)
if !got.Equal(want) {
t.Errorf("Next leap = %s, want %s", got, want)
}
}
func TestNext_impossibleScheduleFails(t *testing.T) {
// February 30th never exists.
c, _ := ParseCron("0 0 30 2 *")
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
_, err := c.Next(now)
if err == nil {
t.Fatal("Next should fail for impossible date combo (Feb 30)")
}
if !strings.Contains(err.Error(), "no match") {
t.Errorf("error = %v, want 'no match' substring", err)
}
}
func TestNext_weekdayOnly(t *testing.T) {
c, _ := ParseCron("0 9 * * 1") // every Monday at 09:00
// Wednesday May 7, 2025
now := time.Date(2025, 5, 7, 12, 0, 0, 0, time.UTC)
got, _ := c.Next(now)
// Next Monday is May 12, 2025
want := time.Date(2025, 5, 12, 9, 0, 0, 0, time.UTC)
if !got.Equal(want) {
t.Errorf("Next weekday = %s, want %s", got, want)
}
if got.Weekday() != time.Monday {
t.Errorf("expected Monday, got %s", got.Weekday())
}
}
// ----------------------------------------------------------------------------
// fieldMatcher
// ----------------------------------------------------------------------------
func TestFieldMatcher_outOfRange(t *testing.T) {
var m fieldMatcher = 0xFFFF
if m.match(-1) {
t.Error("match(-1) should be false")
}
if m.match(64) {
t.Error("match(64) should be false (mask is uint64, bits 0..63 only)")
}
}

View File

@ -128,17 +128,38 @@ func (s *CronScheduler) dispatch(ctx context.Context, row CronDueRow, now time.T
zap.String("expr", row.CronExpression), zap.String("expr", row.CronExpression),
zap.Error(err)) zap.Error(err))
// Push next_run_at far out so we don't keep looking at this row. // Push next_run_at far out so we don't keep looking at this row.
_ = s.store.MarkRun(ctx, row.TriggerID, now, now.Add(365*24*time.Hour), // Best-effort; if the lease is lost (ErrAlreadyRan) another gateway
// already saw the same bad expression and is handling it.
_ = s.store.MarkRun(ctx, row.TriggerID, row.NextRunAt, now, now.Add(365*24*time.Hour),
"error", "bad cron expression: "+err.Error()) "error", "bad cron expression: "+err.Error())
return return
} }
next, err := parsed.Next(now) next, err := parsed.Next(now)
if err != nil { if err != nil {
_ = s.store.MarkRun(ctx, row.TriggerID, now, now.Add(365*24*time.Hour), _ = s.store.MarkRun(ctx, row.TriggerID, row.NextRunAt, now, now.Add(365*24*time.Hour),
"error", "no next match: "+err.Error()) "error", "no next match: "+err.Error())
return return
} }
// Claim the lease BEFORE invoking. The compare-and-swap on next_run_at
// ensures only one gateway wins the race when multiple instances see
// the same trigger as due. Pre-emptive claim (advance to the new next
// + provisional "running" status) means we don't issue duplicate
// invokes; the loser bails here. We patch in the real status after the
// invoke completes via a follow-up unconditional UPDATE keyed on the
// new next_run_at — the loser can't accidentally clobber that because
// its expected-old next_run_at no longer matches anything.
if err := s.store.MarkRun(ctx, row.TriggerID, row.NextRunAt, now, next, "running", ""); err != nil {
if err == ErrAlreadyRan {
// Another gateway claimed this tick. Skip silently.
return
}
s.logger.Warn("cron scheduler: failed to claim lease",
zap.String("trigger_id", row.TriggerID),
zap.Error(err))
return
}
req := &serverless.InvokeRequest{ req := &serverless.InvokeRequest{
Namespace: row.Namespace, Namespace: row.Namespace,
FunctionName: row.FunctionName, FunctionName: row.FunctionName,
@ -160,8 +181,12 @@ func (s *CronScheduler) dispatch(ctx context.Context, row CronDueRow, now time.T
status = "error" status = "error"
errMsg = resp.Error errMsg = resp.Error
} }
if err := s.store.MarkRun(ctx, row.TriggerID, now, next, status, errMsg); err != nil { // Patch the result over the provisional "running" row. We're CAS'd on
s.logger.Warn("cron scheduler: MarkRun failed", // the (now) post-claim next_run_at so a stale loser from another node
// can't overwrite — though by construction, only this gateway holds
// the lease at this point.
if err := s.store.MarkRun(ctx, row.TriggerID, next, now, next, status, errMsg); err != nil && err != ErrAlreadyRan {
s.logger.Warn("cron scheduler: failed to record run result",
zap.String("trigger_id", row.TriggerID), zap.String("trigger_id", row.TriggerID),
zap.Error(err)) zap.Error(err))
} }

View File

@ -43,6 +43,12 @@ type cronRow struct {
// CronDueRow is the JOINed row returned by ListDue: trigger + the function // CronDueRow is the JOINed row returned by ListDue: trigger + the function
// metadata the scheduler needs to invoke it. // metadata the scheduler needs to invoke it.
//
// NextRunAt is captured at ListDue time and passed back into MarkRun as the
// expected-old value for the compare-and-swap UPDATE. Two gateways racing
// the same trigger will both see the same NextRunAt; only the first MarkRun
// wins (1 row affected), the loser gets 0 rows affected and the scheduler
// skips the duplicate invoke.
type CronDueRow struct { type CronDueRow struct {
TriggerID string TriggerID string
FunctionID string FunctionID string
@ -156,6 +162,11 @@ func (s *CronTriggerStore) ListDue(ctx context.Context, now time.Time, limit int
if limit <= 0 { if limit <= 0 {
limit = 100 limit = 100
} }
// `next_run_at IS NOT NULL` is defensive: the schema permits NULL and
// `Add` always populates it, but a manual DB poke or future migration
// could leave a NULL row that would otherwise sort first and behave
// unpredictably under `<=`. Guarding here keeps the scheduler's
// invariants explicit.
query := ` query := `
SELECT t.id AS trigger_id, t.function_id AS function_id, SELECT t.id AS trigger_id, t.function_id AS function_id,
f.name AS function_name, f.namespace AS namespace, f.name AS function_name, f.namespace AS namespace,
@ -164,6 +175,7 @@ func (s *CronTriggerStore) ListDue(ctx context.Context, now time.Time, limit int
JOIN functions f ON t.function_id = f.id JOIN functions f ON t.function_id = f.id
WHERE t.enabled = TRUE WHERE t.enabled = TRUE
AND f.status = 'active' AND f.status = 'active'
AND t.next_run_at IS NOT NULL
AND t.next_run_at <= ? AND t.next_run_at <= ?
ORDER BY t.next_run_at ASC ORDER BY t.next_run_at ASC
LIMIT ? LIMIT ?
@ -178,9 +190,20 @@ func (s *CronTriggerStore) ListDue(ctx context.Context, now time.Time, limit int
// MarkRun updates next_run_at + last_run_at + last_status / last_error // MarkRun updates next_run_at + last_run_at + last_status / last_error
// after the scheduler invokes a trigger. Idempotent: if the row was // after the scheduler invokes a trigger. Idempotent: if the row was
// removed concurrently, the UPDATE is a no-op. // removed concurrently, the UPDATE is a no-op.
//
// expectedNextRunAt enables the soft-lease semantics promised by the
// scheduler comment: the UPDATE is gated on the row's current next_run_at
// matching what ListDue read. Two gateways racing the same tick will both
// see the same expectedNextRunAt; only one's UPDATE matches a row, the
// other gets RowsAffected==0 and ErrAlreadyRan back so the scheduler can
// skip the duplicate invoke.
//
// Returns ErrAlreadyRan when the compare-and-swap missed (another node won
// the race or the row was deleted). Other errors are wrapped DB errors.
func (s *CronTriggerStore) MarkRun( func (s *CronTriggerStore) MarkRun(
ctx context.Context, ctx context.Context,
triggerID string, triggerID string,
expectedNextRunAt time.Time,
ranAt time.Time, ranAt time.Time,
nextRunAt time.Time, nextRunAt time.Time,
status string, status string,
@ -189,8 +212,20 @@ func (s *CronTriggerStore) MarkRun(
query := ` query := `
UPDATE function_cron_triggers UPDATE function_cron_triggers
SET last_run_at = ?, next_run_at = ?, last_status = ?, last_error = ? SET last_run_at = ?, next_run_at = ?, last_status = ?, last_error = ?
WHERE id = ? WHERE id = ? AND next_run_at = ?
` `
_, err := s.db.Exec(ctx, query, ranAt, nextRunAt, status, errMsg, triggerID) res, err := s.db.Exec(ctx, query, ranAt, nextRunAt, status, errMsg, triggerID, expectedNextRunAt)
return err if err != nil {
return fmt.Errorf("failed to MarkRun cron trigger: %w", err)
}
if affected, _ := res.RowsAffected(); affected == 0 {
return ErrAlreadyRan
}
return nil
} }
// ErrAlreadyRan is returned by MarkRun when the compare-and-swap on
// next_run_at missed — either another gateway claimed this tick first, or
// the trigger was deleted concurrently. The scheduler treats it as "skip
// the duplicate invoke result" rather than a hard error.
var ErrAlreadyRan = fmt.Errorf("cron trigger already advanced by another node")

View File

@ -0,0 +1,120 @@
package triggers
import (
"context"
"database/sql"
"errors"
"testing"
"time"
"github.com/DeBrosOfficial/network/pkg/rqlite"
"go.uber.org/zap"
)
// markRunMockDB is a minimal rqlite.Client surface for MarkRun tests.
// Only Exec is exercised — anything else panics so accidental drift is loud.
type markRunMockDB struct {
rqlite.Client // embedded interface — calling any unimplemented method panics
lastQuery string
lastArgs []any
rows int64
execErr error
}
func (m *markRunMockDB) Exec(_ context.Context, query string, args ...any) (sql.Result, error) {
m.lastQuery = query
m.lastArgs = args
if m.execErr != nil {
return nil, m.execErr
}
return &fakeResult{rows: m.rows}, nil
}
type fakeResult struct {
rows int64
}
func (f *fakeResult) LastInsertId() (int64, error) { return 0, nil }
func (f *fakeResult) RowsAffected() (int64, error) { return f.rows, nil }
// TestMarkRun_compareAndSwapMisses guards bug #65 audit B6: when two gateways
// race the same tick, only the first MarkRun wins and the second must return
// ErrAlreadyRan so the caller can skip the duplicate invoke.
func TestMarkRun_compareAndSwapMisses(t *testing.T) {
db := &markRunMockDB{rows: 0} // simulate "no row matched the WHERE"
store := NewCronTriggerStore(db, zap.NewNop())
expected := time.Date(2025, 5, 7, 12, 0, 0, 0, time.UTC)
err := store.MarkRun(context.Background(),
"trigger-id", expected, expected.Add(time.Second),
expected.Add(time.Minute), "ok", "")
if !errors.Is(err, ErrAlreadyRan) {
t.Fatalf("MarkRun = %v, want ErrAlreadyRan", err)
}
}
// TestMarkRun_compareAndSwapWins covers the happy path: 1 row affected, no
// error returned. This pairs with the misses-test to fully cover the lease
// branch.
func TestMarkRun_compareAndSwapWins(t *testing.T) {
db := &markRunMockDB{rows: 1}
store := NewCronTriggerStore(db, zap.NewNop())
expected := time.Date(2025, 5, 7, 12, 0, 0, 0, time.UTC)
err := store.MarkRun(context.Background(),
"trigger-id", expected, expected.Add(time.Second),
expected.Add(time.Minute), "ok", "")
if err != nil {
t.Fatalf("MarkRun (winning CAS) = %v, want nil", err)
}
}
// TestMarkRun_dbErrorPropagates ensures a real database failure is wrapped
// and returned, NOT collapsed into ErrAlreadyRan.
func TestMarkRun_dbErrorPropagates(t *testing.T) {
dbErr := errors.New("rqlite unavailable")
db := &markRunMockDB{execErr: dbErr}
store := NewCronTriggerStore(db, zap.NewNop())
err := store.MarkRun(context.Background(),
"trigger-id", time.Now(), time.Now(), time.Now(), "ok", "")
if err == nil {
t.Fatal("MarkRun with DB error returned nil")
}
if errors.Is(err, ErrAlreadyRan) {
t.Fatal("DB error should not surface as ErrAlreadyRan")
}
if !errors.Is(err, dbErr) {
t.Errorf("error chain missing wrapped DB error: %v", err)
}
}
// TestMarkRun_compareAndSwapClauseInQuery verifies the SQL actually contains
// the `next_run_at = ?` predicate — without it the lease semantics are
// silently broken even though tests with a 1-row mock would still "pass".
func TestMarkRun_compareAndSwapClauseInQuery(t *testing.T) {
db := &markRunMockDB{rows: 1}
store := NewCronTriggerStore(db, zap.NewNop())
_ = store.MarkRun(context.Background(), "id",
time.Now(), time.Now(), time.Now(), "ok", "")
if db.lastQuery == "" {
t.Fatal("Exec was not called")
}
// The exact SQL is whitespace-formatted; just check the lease clause is
// present.
if !contains(db.lastQuery, "next_run_at = ?") {
t.Errorf("MarkRun SQL missing CAS clause:\n%s", db.lastQuery)
}
}
func contains(haystack, needle string) bool {
for i := 0; i+len(needle) <= len(haystack); i++ {
if haystack[i:i+len(needle)] == needle {
return true
}
}
return false
}