mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-06-16 23:14:13 +00:00
feat(gateway): implement self-service tenant push notifications
- Add `namespace_push_config` table for per-namespace provider settings - Introduce `cluster_secret_path` to enable deterministic JWT signing and AES-256-GCM encryption for push credentials - Update gateway config to support per-namespace overrides of push notification providers (ntfy/Expo) - Bump version to 0.122.3
This commit is contained in:
parent
b5f6fb4497
commit
f55c7269cd
@ -63,7 +63,7 @@ test-e2e-quick:
|
|||||||
|
|
||||||
.PHONY: build clean test deps tidy fmt vet lint install-hooks push-devnet push-testnet rollout-devnet rollout-testnet release
|
.PHONY: build clean test deps tidy fmt vet lint install-hooks push-devnet push-testnet rollout-devnet rollout-testnet release
|
||||||
|
|
||||||
VERSION := 0.122.2
|
VERSION := 0.122.3
|
||||||
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
|
COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown)
|
||||||
DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
|
DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
|
||||||
LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)'
|
LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)'
|
||||||
|
|||||||
@ -77,21 +77,26 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type yamlCfg struct {
|
type yamlCfg struct {
|
||||||
ListenAddr string `yaml:"listen_addr"`
|
ListenAddr string `yaml:"listen_addr"`
|
||||||
ClientNamespace string `yaml:"client_namespace"`
|
ClientNamespace string `yaml:"client_namespace"`
|
||||||
RQLiteDSN string `yaml:"rqlite_dsn"`
|
RQLiteDSN string `yaml:"rqlite_dsn"`
|
||||||
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"`
|
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"`
|
||||||
Peers []string `yaml:"bootstrap_peers"`
|
Peers []string `yaml:"bootstrap_peers"`
|
||||||
EnableHTTPS bool `yaml:"enable_https"`
|
EnableHTTPS bool `yaml:"enable_https"`
|
||||||
DomainName string `yaml:"domain_name"`
|
DomainName string `yaml:"domain_name"`
|
||||||
TLSCacheDir string `yaml:"tls_cache_dir"`
|
TLSCacheDir string `yaml:"tls_cache_dir"`
|
||||||
OlricServers []string `yaml:"olric_servers"`
|
OlricServers []string `yaml:"olric_servers"`
|
||||||
OlricTimeout string `yaml:"olric_timeout"`
|
OlricTimeout string `yaml:"olric_timeout"`
|
||||||
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"`
|
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"`
|
||||||
IPFSAPIURL string `yaml:"ipfs_api_url"`
|
IPFSAPIURL string `yaml:"ipfs_api_url"`
|
||||||
IPFSTimeout string `yaml:"ipfs_timeout"`
|
IPFSTimeout string `yaml:"ipfs_timeout"`
|
||||||
IPFSReplicationFactor int `yaml:"ipfs_replication_factor"`
|
IPFSReplicationFactor int `yaml:"ipfs_replication_factor"`
|
||||||
WebRTC yamlWebRTCCfg `yaml:"webrtc"`
|
WebRTC yamlWebRTCCfg `yaml:"webrtc"`
|
||||||
|
// ClusterSecretPath: see GatewayYAMLConfig docstring. Optional;
|
||||||
|
// when set, the standalone gateway reads the file at this path
|
||||||
|
// and populates cfg.ClusterSecret so JWT signing keys can be
|
||||||
|
// derived deterministically (bug #215 fix).
|
||||||
|
ClusterSecretPath string `yaml:"cluster_secret_path"`
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := os.ReadFile(configPath)
|
data, err := os.ReadFile(configPath)
|
||||||
@ -200,6 +205,30 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config {
|
|||||||
cfg.IPFSReplicationFactor = y.IPFSReplicationFactor
|
cfg.IPFSReplicationFactor = y.IPFSReplicationFactor
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cluster secret — bug #215 fix. The host-managed gateway in
|
||||||
|
// pkg/node/gateway.go reads this from a known on-disk path; the
|
||||||
|
// standalone binary (used by namespace gateways via systemd) needs the
|
||||||
|
// same access so it can derive the cluster-wide Ed25519 JWT signing
|
||||||
|
// key. Without this, namespace gateways had per-node random keys and
|
||||||
|
// JWTs minted on one node were unverifiable on another, leaving
|
||||||
|
// `caller_jwt_subject` empty in serverless host functions.
|
||||||
|
if path := strings.TrimSpace(y.ClusterSecretPath); path != "" {
|
||||||
|
secretBytes, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
logger.ComponentError(logging.ComponentGeneral,
|
||||||
|
"cluster_secret_path is set but the file is unreadable; "+
|
||||||
|
"JWTs will use a per-node random signing key and will not "+
|
||||||
|
"verify cross-node — bug #215 will reproduce",
|
||||||
|
zap.String("path", path),
|
||||||
|
zap.Error(err))
|
||||||
|
} else {
|
||||||
|
cfg.ClusterSecret = strings.TrimSpace(string(secretBytes))
|
||||||
|
logger.ComponentInfo(logging.ComponentGeneral,
|
||||||
|
"Loaded cluster secret for cluster-wide JWT signing key derivation",
|
||||||
|
zap.String("path", path))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WebRTC configuration
|
// WebRTC configuration
|
||||||
cfg.WebRTCEnabled = y.WebRTC.Enabled
|
cfg.WebRTCEnabled = y.WebRTC.Enabled
|
||||||
if y.WebRTC.SFUPort > 0 {
|
if y.WebRTC.SFUPort > 0 {
|
||||||
|
|||||||
@ -489,25 +489,59 @@ sudo cp caddy-root-ca.crt /usr/local/share/ca-certificates/caddy-root-ca.crt
|
|||||||
sudo update-ca-certificates
|
sudo update-ca-certificates
|
||||||
```
|
```
|
||||||
|
|
||||||
## Push notifications (optional, per gateway)
|
## Push notifications
|
||||||
|
|
||||||
Push routes (`/v1/push/devices`, `/v1/push/send`) are always registered but
|
Push provider configuration is **tenant-self-service** as of bug #220
|
||||||
return `503 SERVICE_UNAVAILABLE` until at least one provider is configured
|
follow-up. Tenants set their own ntfy / Expo credentials via authenticated
|
||||||
in the gateway config:
|
HTTP — operators no longer need to edit YAML and restart for every namespace
|
||||||
|
that wants push.
|
||||||
|
|
||||||
```yaml
|
### Tenant flow (no operator involvement)
|
||||||
# In your namespace gateway config:
|
|
||||||
push:
|
```bash
|
||||||
ntfy_base_url: "http://localhost:8080" # self-hosted ntfy (preferred for privacy)
|
# Set per-namespace config
|
||||||
expo_access_token: "..." # Expo push (alternative; client SDK lock-in)
|
curl -X PUT https://ns-anchat-test.orama-devnet.network/v1/push/config \
|
||||||
|
-H 'Authorization: Bearer <user-jwt>' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"ntfy_base_url": "https://ntfy.sh"}'
|
||||||
|
|
||||||
|
# Read current config (secrets redacted to booleans)
|
||||||
|
curl https://ns-anchat-test.orama-devnet.network/v1/push/config \
|
||||||
|
-H 'Authorization: Bearer <user-jwt>'
|
||||||
|
|
||||||
|
# Clear (push reverts to gateway YAML defaults, or 503 if no defaults)
|
||||||
|
curl -X DELETE https://ns-anchat-test.orama-devnet.network/v1/push/config \
|
||||||
|
-H 'Authorization: Bearer <user-jwt>'
|
||||||
```
|
```
|
||||||
|
|
||||||
Restart the gateway after editing. A `GET /v1/push/devices` call (with valid
|
Per-namespace config takes effect on the NEXT push send (the cached
|
||||||
auth) will then return `200` instead of `503`.
|
dispatcher is invalidated on PUT/DELETE). No restart needed.
|
||||||
|
|
||||||
The 503 body explicitly names the config knobs operators need to set, so
|
### Operator flow (cluster-wide defaults — optional)
|
||||||
tenants getting "Push not configured" can self-diagnose without filing a
|
|
||||||
ticket. (Bug #220 audit fix.)
|
Operators can still seed defaults in the gateway YAML. Per-namespace config
|
||||||
|
OVERRIDES the defaults; namespaces with no row inherit them.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Cluster-wide push defaults (optional; tenants override per-namespace)
|
||||||
|
push:
|
||||||
|
ntfy_base_url: "https://ntfy.sh" # default for namespaces with no override
|
||||||
|
expo_access_token: "..." # default Expo token
|
||||||
|
```
|
||||||
|
|
||||||
|
### Encryption
|
||||||
|
|
||||||
|
Sensitive credentials (`ntfy_auth_token`, `expo_access_token`) are
|
||||||
|
AES-256-GCM-encrypted at rest in the `namespace_push_config` table using
|
||||||
|
a key derived from the cluster secret. The GET endpoint returns boolean
|
||||||
|
`has_X` flags only — credentials are NEVER echoed back over HTTP.
|
||||||
|
|
||||||
|
### Disabling push entirely
|
||||||
|
|
||||||
|
If `cluster_secret` isn't configured on the gateway, the push subsystem
|
||||||
|
is disabled and `/v1/push/*` returns 503. To enable: set the cluster secret
|
||||||
|
and restart. (This is the only operator-side restart still required, and
|
||||||
|
it's a one-time action at gateway provisioning.)
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
|
|||||||
@ -194,6 +194,49 @@ The legacy `db_execute` is kept indefinitely so existing functions don't break.
|
|||||||
| `log_info(message)` | Log info-level message (captured in invocation logs). |
|
| `log_info(message)` | Log info-level message (captured in invocation logs). |
|
||||||
| `log_error(message)` | Log error-level message. |
|
| `log_error(message)` | Log error-level message. |
|
||||||
|
|
||||||
|
## Configuring Push Notifications (per-namespace)
|
||||||
|
|
||||||
|
Push providers (ntfy / Expo) are configured **per namespace** by the tenant —
|
||||||
|
no operator involvement, no SSH access required. Set, read, or clear via:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Set / update (sensitive credentials are encrypted at rest)
|
||||||
|
curl -X PUT https://ns-myapp.example.com/v1/push/config \
|
||||||
|
-H 'Authorization: Bearer <user-jwt>' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"ntfy_base_url": "https://ntfy.sh",
|
||||||
|
"ntfy_auth_token": "tk_…"
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Read (sensitive fields redacted to booleans)
|
||||||
|
curl https://ns-myapp.example.com/v1/push/config \
|
||||||
|
-H 'Authorization: Bearer <user-jwt>'
|
||||||
|
|
||||||
|
# Clear (push reverts to gateway-wide defaults if any, else 503)
|
||||||
|
curl -X DELETE https://ns-myapp.example.com/v1/push/config \
|
||||||
|
-H 'Authorization: Bearer <user-jwt>'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Field semantics
|
||||||
|
|
||||||
|
| Field | Sensitive? | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| `ntfy_base_url` | No | URL of the ntfy server. `https://ntfy.sh` works for testing. |
|
||||||
|
| `ntfy_auth_token` | Yes | Optional bearer token sent to ntfy. Encrypted at rest. |
|
||||||
|
| `expo_access_token` | Yes | Expo Push API access token. Encrypted at rest. |
|
||||||
|
|
||||||
|
PUT semantics are **field-level** — a `null` (or omitted) field leaves the
|
||||||
|
existing value alone; an explicit empty string clears just that field. To
|
||||||
|
clear EVERYTHING use DELETE.
|
||||||
|
|
||||||
|
After a PUT the next `push_send` (host call) or `POST /v1/push/send` uses
|
||||||
|
the new providers — the cached dispatcher is invalidated automatically.
|
||||||
|
|
||||||
|
If no per-namespace config is set AND the gateway has no YAML defaults, the
|
||||||
|
push endpoints return **503 SERVICE_UNAVAILABLE** with a message naming the
|
||||||
|
exact config to set.
|
||||||
|
|
||||||
## Managing Secrets
|
## Managing Secrets
|
||||||
|
|
||||||
Secrets are encrypted at rest (AES-256-GCM) and scoped to your namespace. Functions read them via `get_secret("name")` at runtime.
|
Secrets are encrypted at rest (AES-256-GCM) and scoped to your namespace. Functions read them via `get_secret("name")` at runtime.
|
||||||
|
|||||||
26
core/migrations/026_namespace_push_config.sql
Normal file
26
core/migrations/026_namespace_push_config.sql
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
-- =============================================================================
|
||||||
|
-- 026_namespace_push_config.sql
|
||||||
|
--
|
||||||
|
-- Per-namespace push notification provider configuration. Tenants set their
|
||||||
|
-- own ntfy / expo credentials via PUT /v1/push/config without operator
|
||||||
|
-- involvement (bug #220 follow-up — self-service tenant config).
|
||||||
|
--
|
||||||
|
-- Sensitive credentials (auth tokens) are AES-256-GCM ciphertext via
|
||||||
|
-- pkg/secrets, prefix 'enc:'. Non-secret URLs (ntfy_base_url) stored
|
||||||
|
-- plaintext — they leak no security material.
|
||||||
|
--
|
||||||
|
-- The gateway YAML config remains as a global fallback / default. A row
|
||||||
|
-- in this table OVERRIDES the YAML for that namespace; absence falls back.
|
||||||
|
-- =============================================================================
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS namespace_push_config (
|
||||||
|
namespace TEXT PRIMARY KEY,
|
||||||
|
-- ntfy provider config (URL is non-secret; auth token is)
|
||||||
|
ntfy_base_url TEXT,
|
||||||
|
ntfy_auth_token_encrypted TEXT,
|
||||||
|
-- expo provider config (the access token IS sensitive)
|
||||||
|
expo_access_token_encrypted TEXT,
|
||||||
|
-- Audit metadata: who set this, and when (last update wins).
|
||||||
|
updated_at INTEGER NOT NULL,
|
||||||
|
updated_by TEXT
|
||||||
|
);
|
||||||
@ -98,7 +98,7 @@ Examples:
|
|||||||
return pushDirect(nodes, archivePath)
|
return pushDirect(nodes, archivePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Multi-node with --fanout: use agent forwarding
|
// Multi-node with --fanout: upload to hub, fan out server-to-server
|
||||||
return pushFanout(nodes, archivePath)
|
return pushFanout(nodes, archivePath)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -135,27 +135,35 @@ func pushDirect(nodes []inspector.Node, archivePath string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// pushFanout uploads the archive to the first node, then fans out server-to-server
|
// pushFanout uploads the archive to the first node (hub), then fans out
|
||||||
// using SSH agent forwarding.
|
// server-to-server: the hub SCPs the archive to all other nodes in parallel
|
||||||
|
// and SSHes in to extract it.
|
||||||
|
//
|
||||||
|
// Key design — no SSH agent forwarding:
|
||||||
|
//
|
||||||
|
// The previous implementation loaded all N node keys into the system ssh-agent
|
||||||
|
// and used agent forwarding (-A) so the hub could reach targets. That caused
|
||||||
|
// "Too many authentication failures": when the hub connected to a target, the
|
||||||
|
// forwarded agent offered all N keys sequentially; if N exceeds the server's
|
||||||
|
// MaxAuthTries (default 6 on most distros), the server disconnects before the
|
||||||
|
// correct key is tried.
|
||||||
|
//
|
||||||
|
// Fix: PrepareNodeKeys (called by the parent command) already fetched and wrote
|
||||||
|
// each node's private key to a temp file (node.SSHKey). We base64-encode each
|
||||||
|
// key and embed it directly in the fanout bash script. On the hub, the script
|
||||||
|
// writes each key to its own mktemp file, uses -o IdentitiesOnly=yes -i $K
|
||||||
|
// (only ONE key offered per connection), and deletes the temp file immediately.
|
||||||
|
// No ssh-agent involved on either end; MaxAuthTries is irrelevant.
|
||||||
func pushFanout(nodes []inspector.Node, archivePath string) error {
|
func pushFanout(nodes []inspector.Node, archivePath string) error {
|
||||||
fmt.Printf("Pushing to %d node(s) (fanout)...\n\n", len(nodes))
|
fmt.Printf("Pushing to %d node(s) (fanout)...\n\n", len(nodes))
|
||||||
|
|
||||||
hub := nodes[0]
|
hub := nodes[0]
|
||||||
targets := nodes[1:]
|
targets := nodes[1:]
|
||||||
remotePath := "/tmp/" + filepath.Base(archivePath)
|
remotePath := "/tmp/" + filepath.Base(archivePath)
|
||||||
// Hub extraction keeps the archive so it can be fanned out to targets.
|
|
||||||
// The cleanup at the end removes it.
|
|
||||||
hubExtractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama", remotePath)
|
|
||||||
// Target extraction deletes the archive after extracting.
|
|
||||||
targetExtractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s", remotePath, remotePath)
|
|
||||||
|
|
||||||
// Load SSH keys into the system ssh-agent for agent forwarding
|
// Hub keeps the archive on disk after extracting — targets SCP it from here.
|
||||||
fmt.Println(" Loading SSH keys into agent...")
|
// The archive is removed from the hub at the end of this function.
|
||||||
if err := remotessh.LoadAgentKeys(nodes); err != nil {
|
hubExtractCmd := fmt.Sprintf("mkdir -p /opt/orama && tar xzf %s -C /opt/orama", remotePath)
|
||||||
fmt.Printf(" Warning: failed to load agent keys: %v\n", err)
|
|
||||||
fmt.Println(" Falling back to direct push...")
|
|
||||||
return pushDirect(nodes, archivePath)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Upload archive to hub
|
// Upload archive to hub
|
||||||
fmt.Printf(" %s (hub): uploading...", hub.Host)
|
fmt.Printf(" %s (hub): uploading...", hub.Host)
|
||||||
@ -168,27 +176,53 @@ func pushFanout(nodes []inspector.Node, archivePath string) error {
|
|||||||
}
|
}
|
||||||
fmt.Println(" OK")
|
fmt.Println(" OK")
|
||||||
|
|
||||||
// Build the fanout command — hub SCPs to all targets in parallel
|
// Build the fanout script. Each target gets its own shell subshell that:
|
||||||
|
// 1. Writes the target's SSH private key to a mktemp file ($K).
|
||||||
|
// 2. SCPs the archive from the hub's local path to the target.
|
||||||
|
// 3. SSHes into the target to extract it.
|
||||||
|
// 4. Deletes $K — key material never lingers on the hub.
|
||||||
|
//
|
||||||
|
// All subshells run in parallel (&); a final `wait` collects them.
|
||||||
|
// The entire script is base64-encoded before transmission to avoid shell
|
||||||
|
// quoting conflicts (the script contains both single and double quotes).
|
||||||
var fanoutParts []string
|
var fanoutParts []string
|
||||||
for _, t := range targets {
|
for _, t := range targets {
|
||||||
scpCmd := fmt.Sprintf(
|
keyBytes, err := os.ReadFile(t.SSHKey)
|
||||||
"scp -o StrictHostKeyChecking=accept-new -o IdentitiesOnly=no %s %s@%s:%s && ssh -o StrictHostKeyChecking=accept-new %s@%s 'sudo bash -c \"%s\"' && echo '%s: done'",
|
if err != nil {
|
||||||
|
// SSHKey was populated by PrepareNodeKeys; this should never
|
||||||
|
// happen unless the temp file was somehow deleted mid-run.
|
||||||
|
fmt.Printf(" Warning: could not read key for %s: %v (skipping)\n", t.Host, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// base64 alphabet is [A-Za-z0-9+/=] — no shell metacharacters —
|
||||||
|
// safe to embed in single-quoted strings inside the script.
|
||||||
|
keyB64 := base64.StdEncoding.EncodeToString(keyBytes)
|
||||||
|
|
||||||
|
part := fmt.Sprintf(
|
||||||
|
"(K=$(mktemp) && echo '%s' | base64 -d >\"$K\" && chmod 600 \"$K\" && "+
|
||||||
|
"scp -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30 -o IdentitiesOnly=yes -i \"$K\" %s %s@%s:%s && "+
|
||||||
|
"ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=30 -o IdentitiesOnly=yes -i \"$K\" %s@%s "+
|
||||||
|
"'sudo bash -c \"mkdir -p /opt/orama && tar xzf %s -C /opt/orama && rm -f %s\"' && "+
|
||||||
|
"rm -f \"$K\" && echo '%s: done' || (rm -f \"$K\" 2>/dev/null; echo '%s: FAILED')) &",
|
||||||
|
keyB64,
|
||||||
remotePath, t.User, t.Host, remotePath,
|
remotePath, t.User, t.Host, remotePath,
|
||||||
t.User, t.Host, targetExtractCmd,
|
t.User, t.Host,
|
||||||
t.Host,
|
remotePath, remotePath,
|
||||||
|
t.Host, t.Host,
|
||||||
)
|
)
|
||||||
fanoutParts = append(fanoutParts, "("+scpCmd+") &")
|
fanoutParts = append(fanoutParts, part)
|
||||||
}
|
}
|
||||||
fanoutParts = append(fanoutParts, "wait", "echo 'Fanout complete'")
|
fanoutParts = append(fanoutParts, "wait", "echo 'Fanout complete'")
|
||||||
fanoutScript := strings.Join(fanoutParts, "\n")
|
fanoutScript := strings.Join(fanoutParts, "\n")
|
||||||
|
|
||||||
// Base64-encode the script to avoid shell quoting conflicts — the script
|
|
||||||
// contains single quotes (ssh '...') that would break a bash -c '...' wrapper.
|
|
||||||
encoded := base64.StdEncoding.EncodeToString([]byte(fanoutScript))
|
encoded := base64.StdEncoding.EncodeToString([]byte(fanoutScript))
|
||||||
runCmd := fmt.Sprintf("echo %s | base64 -d | bash", encoded)
|
runCmd := fmt.Sprintf("echo %s | base64 -d | bash", encoded)
|
||||||
|
|
||||||
fmt.Printf(" Fanning out to %d nodes from %s...\n", len(targets), hub.Host)
|
fmt.Printf(" Fanning out to %d nodes from %s...\n", len(targets), hub.Host)
|
||||||
if err := remotessh.RunSSHStreaming(hub, runCmd, remotessh.WithAgentForward()); err != nil {
|
// No agent forwarding — hub authenticates to each target using only
|
||||||
|
// that target's key (embedded above). IdentitiesOnly=yes ensures the
|
||||||
|
// hub's own host key is never accidentally offered to other nodes.
|
||||||
|
if err := remotessh.RunSSHStreaming(hub, runCmd); err != nil {
|
||||||
fmt.Printf(" Fanout failed: %v\n", err)
|
fmt.Printf(" Fanout failed: %v\n", err)
|
||||||
fmt.Println(" Some nodes may not have been updated")
|
fmt.Println(" Some nodes may not have been updated")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"text/tabwriter"
|
"text/tabwriter"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
@ -126,32 +127,83 @@ func runTriggersList(cmd *cobra.Command, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
w := tabwriter.NewWriter(cmd.OutOrStdout(), 0, 0, 2, ' ', 0)
|
w := tabwriter.NewWriter(cmd.OutOrStdout(), 0, 0, 2, ' ', 0)
|
||||||
fmt.Fprintln(w, "ID\tTOPIC\tENABLED")
|
// Bug #65 audit: the previous CLI rendered only ID/TOPIC/ENABLED, so cron
|
||||||
|
// triggers appeared as mystery blank-topic rows. The handler returns a
|
||||||
|
// `kind` discriminator plus pubsub-only `topic` or cron-only
|
||||||
|
// `cron_expression` / `next_run_at` / `last_run_at`; the CLI now renders
|
||||||
|
// both kinds in a single unified table.
|
||||||
|
fmt.Fprintln(w, "ID\tKIND\tSCHEDULE/TOPIC\tNEXT RUN\tLAST RUN\tENABLED")
|
||||||
for _, t := range triggers {
|
for _, t := range triggers {
|
||||||
tr, ok := t.(map[string]interface{})
|
tr, ok := t.(map[string]interface{})
|
||||||
if !ok {
|
if !ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
id, _ := tr["ID"].(string)
|
id := stringField(tr, "id", "ID")
|
||||||
if id == "" {
|
kind := stringField(tr, "kind", "Kind")
|
||||||
id, _ = tr["id"].(string)
|
// Backward compat: pre-#65 servers returned only `topic` with no
|
||||||
|
// `kind` field. Treat those as pubsub.
|
||||||
|
if kind == "" {
|
||||||
|
kind = "pubsub"
|
||||||
}
|
}
|
||||||
topic, _ := tr["Topic"].(string)
|
|
||||||
if topic == "" {
|
var what, nextRun, lastRun string
|
||||||
topic, _ = tr["topic"].(string)
|
switch kind {
|
||||||
|
case "cron":
|
||||||
|
what = stringField(tr, "cron_expression", "CronExpression")
|
||||||
|
nextRun = formatCronTimestamp(tr["next_run_at"])
|
||||||
|
lastRun = formatCronTimestamp(tr["last_run_at"])
|
||||||
|
default: // pubsub or unknown
|
||||||
|
what = stringField(tr, "topic", "Topic")
|
||||||
|
nextRun = "-"
|
||||||
|
lastRun = "-"
|
||||||
}
|
}
|
||||||
|
|
||||||
enabled := true
|
enabled := true
|
||||||
if e, ok := tr["Enabled"].(bool); ok {
|
if e, ok := tr["Enabled"].(bool); ok {
|
||||||
enabled = e
|
enabled = e
|
||||||
} else if e, ok := tr["enabled"].(bool); ok {
|
} else if e, ok := tr["enabled"].(bool); ok {
|
||||||
enabled = e
|
enabled = e
|
||||||
}
|
}
|
||||||
fmt.Fprintf(w, "%s\t%s\t%v\n", id, topic, enabled)
|
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%v\n", id, kind, what, nextRun, lastRun, enabled)
|
||||||
}
|
}
|
||||||
w.Flush()
|
w.Flush()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stringField pulls a string from a JSON-decoded map under any of the
|
||||||
|
// supplied keys, in order. The handler emits snake_case (`cron_expression`)
|
||||||
|
// while older Go-tagged structs may surface PascalCase — try both.
|
||||||
|
func stringField(m map[string]interface{}, keys ...string) string {
|
||||||
|
for _, k := range keys {
|
||||||
|
if v, ok := m[k].(string); ok && v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// formatCronTimestamp renders a JSON timestamp from the handler in a compact
|
||||||
|
// human-readable form. Returns "-" for nil / unparseable values so the CLI
|
||||||
|
// table stays aligned for never-run / pubsub rows.
|
||||||
|
func formatCronTimestamp(v interface{}) string {
|
||||||
|
if v == nil {
|
||||||
|
return "-"
|
||||||
|
}
|
||||||
|
s, ok := v.(string)
|
||||||
|
if !ok || s == "" {
|
||||||
|
return "-"
|
||||||
|
}
|
||||||
|
// Try RFC3339 first (Go's default time.Time JSON encoding); fall back to
|
||||||
|
// the raw string so unexpected formats don't disappear silently.
|
||||||
|
if ts, err := time.Parse(time.RFC3339, s); err == nil {
|
||||||
|
return ts.UTC().Format("2006-01-02 15:04:05 UTC")
|
||||||
|
}
|
||||||
|
if ts, err := time.Parse(time.RFC3339Nano, s); err == nil {
|
||||||
|
return ts.UTC().Format("2006-01-02 15:04:05 UTC")
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
func runTriggersDelete(cmd *cobra.Command, args []string) error {
|
func runTriggersDelete(cmd *cobra.Command, args []string) error {
|
||||||
funcName := args[0]
|
funcName := args[0]
|
||||||
triggerID := args[1]
|
triggerID := args[1]
|
||||||
|
|||||||
114
core/pkg/cli/functions/triggers_test.go
Normal file
114
core/pkg/cli/functions/triggers_test.go
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
package functions
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// stringField — pulls value from JSON-decoded map under any of the given keys
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestStringField_prefersFirstKey(t *testing.T) {
|
||||||
|
m := map[string]interface{}{
|
||||||
|
"id": "first",
|
||||||
|
"ID": "second",
|
||||||
|
}
|
||||||
|
if got := stringField(m, "id", "ID"); got != "first" {
|
||||||
|
t.Errorf("stringField = %q, want %q", got, "first")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStringField_fallsThroughWhenFirstMissing(t *testing.T) {
|
||||||
|
m := map[string]interface{}{
|
||||||
|
"ID": "second",
|
||||||
|
}
|
||||||
|
if got := stringField(m, "id", "ID"); got != "second" {
|
||||||
|
t.Errorf("stringField = %q, want %q", got, "second")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStringField_emptyValueSkipped(t *testing.T) {
|
||||||
|
// An empty string under the first key MUST fall through to subsequent
|
||||||
|
// keys, otherwise empty pubsub `topic` fields would shadow valid
|
||||||
|
// PascalCase `Topic`.
|
||||||
|
m := map[string]interface{}{
|
||||||
|
"id": "",
|
||||||
|
"ID": "fallback",
|
||||||
|
}
|
||||||
|
if got := stringField(m, "id", "ID"); got != "fallback" {
|
||||||
|
t.Errorf("stringField = %q, want %q", got, "fallback")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStringField_nonStringValueSkipped(t *testing.T) {
|
||||||
|
m := map[string]interface{}{
|
||||||
|
"id": 42,
|
||||||
|
"ID": "ok",
|
||||||
|
}
|
||||||
|
if got := stringField(m, "id", "ID"); got != "ok" {
|
||||||
|
t.Errorf("stringField = %q, want %q", got, "ok")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStringField_allMissingReturnsEmpty(t *testing.T) {
|
||||||
|
m := map[string]interface{}{"other": "value"}
|
||||||
|
if got := stringField(m, "id", "ID"); got != "" {
|
||||||
|
t.Errorf("stringField = %q, want empty", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// formatCronTimestamp — RFC3339 -> UTC display, "-" for missing/unparseable
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestFormatCronTimestamp_nilReturnsDash(t *testing.T) {
|
||||||
|
if got := formatCronTimestamp(nil); got != "-" {
|
||||||
|
t.Errorf("formatCronTimestamp(nil) = %q, want %q", got, "-")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatCronTimestamp_emptyStringReturnsDash(t *testing.T) {
|
||||||
|
if got := formatCronTimestamp(""); got != "-" {
|
||||||
|
t.Errorf("formatCronTimestamp(\"\") = %q, want %q", got, "-")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatCronTimestamp_nonStringReturnsDash(t *testing.T) {
|
||||||
|
if got := formatCronTimestamp(42); got != "-" {
|
||||||
|
t.Errorf("formatCronTimestamp(42) = %q, want %q", got, "-")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatCronTimestamp_rfc3339(t *testing.T) {
|
||||||
|
got := formatCronTimestamp("2025-05-08T03:00:00Z")
|
||||||
|
want := "2025-05-08 03:00:00 UTC"
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("formatCronTimestamp = %q, want %q", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatCronTimestamp_rfc3339Nano(t *testing.T) {
|
||||||
|
got := formatCronTimestamp("2025-05-08T03:00:00.123456789Z")
|
||||||
|
want := "2025-05-08 03:00:00 UTC"
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("formatCronTimestamp nano = %q, want %q", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatCronTimestamp_rfc3339WithOffset(t *testing.T) {
|
||||||
|
// Non-UTC offsets must be normalised to UTC for the display.
|
||||||
|
got := formatCronTimestamp("2025-05-08T05:00:00+02:00")
|
||||||
|
want := "2025-05-08 03:00:00 UTC"
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("formatCronTimestamp offset = %q, want %q", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatCronTimestamp_unparseableFallsBackToRaw(t *testing.T) {
|
||||||
|
// If the server returns an unexpected timestamp shape, surface it
|
||||||
|
// rather than silently dropping to "-" — operator visibility wins.
|
||||||
|
got := formatCronTimestamp("not-a-timestamp")
|
||||||
|
if got != "not-a-timestamp" {
|
||||||
|
t.Errorf("formatCronTimestamp unparseable = %q, want raw passthrough", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -416,3 +416,73 @@ func TestJWKSHandler_RSAOnly(t *testing.T) {
|
|||||||
t.Errorf("expected RS256, got %s", result.Keys[0]["alg"])
|
t.Errorf("expected RS256, got %s", result.Keys[0]["alg"])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestEdDSACrossServiceVerify is the regression test for bug #215. Two Service
|
||||||
|
// instances configured with the SAME Ed25519 key (the cluster-shared scenario
|
||||||
|
// produced by deterministic HKDF derivation in pkg/gateway/signing_key.go)
|
||||||
|
// must be able to verify each other's tokens. Without this guarantee a JWT
|
||||||
|
// minted on the main gateway is unverifiable on a namespace gateway and host
|
||||||
|
// functions see an empty caller_jwt_subject.
|
||||||
|
func TestEdDSACrossServiceVerify(t *testing.T) {
|
||||||
|
// Shared key — what HKDF would produce from the cluster secret.
|
||||||
|
_, shared, err := ed25519.GenerateKey(rand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("generate shared key: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
makeService := func() *Service {
|
||||||
|
s := createTestService(t) // RSA + mock client
|
||||||
|
s.SetEdDSAKey(shared)
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
signer := makeService() // simulates main gateway
|
||||||
|
verifier := makeService() // simulates namespace gateway (different process, same shared key)
|
||||||
|
|
||||||
|
// Sanity: both services must agree on edKeyID since it is derived from the
|
||||||
|
// public key. If they don't, kid-based verification will silently fail.
|
||||||
|
if signer.edKeyID != verifier.edKeyID {
|
||||||
|
t.Fatalf("edKeyID mismatch: signer=%q verifier=%q", signer.edKeyID, verifier.edKeyID)
|
||||||
|
}
|
||||||
|
|
||||||
|
const wantSub = "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE"
|
||||||
|
const wantNS = "anchat-test"
|
||||||
|
token, _, err := signer.GenerateJWT(wantNS, wantSub, 15*time.Minute)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("signer.GenerateJWT: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
claims, err := verifier.ParseAndVerifyJWT(token)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("cross-service verify failed: %v", err)
|
||||||
|
}
|
||||||
|
if claims.Sub != wantSub {
|
||||||
|
t.Errorf("Sub = %q, want %q", claims.Sub, wantSub)
|
||||||
|
}
|
||||||
|
if claims.Namespace != wantNS {
|
||||||
|
t.Errorf("Namespace = %q, want %q", claims.Namespace, wantNS)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestEdDSACrossServiceVerify_differentKeysFail proves the verify gate is
|
||||||
|
// real: when two services have different Ed25519 keys (the broken state
|
||||||
|
// before bug #215 fix), tokens minted on one MUST NOT validate on the other.
|
||||||
|
// If this test ever passes, the deterministic-derivation guarantee is
|
||||||
|
// silently bypassed somewhere.
|
||||||
|
func TestEdDSACrossServiceVerify_differentKeysFail(t *testing.T) {
|
||||||
|
signer := createTestService(t)
|
||||||
|
_, signKey, _ := ed25519.GenerateKey(rand.Reader)
|
||||||
|
signer.SetEdDSAKey(signKey)
|
||||||
|
|
||||||
|
verifier := createTestService(t)
|
||||||
|
_, verKey, _ := ed25519.GenerateKey(rand.Reader)
|
||||||
|
verifier.SetEdDSAKey(verKey)
|
||||||
|
|
||||||
|
token, _, err := signer.GenerateJWT("ns", "sub", 15*time.Minute)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GenerateJWT: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := verifier.ParseAndVerifyJWT(token); err == nil {
|
||||||
|
t.Fatal("expected verification to fail with different signing keys, got nil error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -82,11 +82,20 @@ type Dependencies struct {
|
|||||||
// Used by the ws_pubsub_bridge host function. Nil = disabled.
|
// Used by the ws_pubsub_bridge host function. Nil = disabled.
|
||||||
WSBridge *wsbridge.Bridge
|
WSBridge *wsbridge.Bridge
|
||||||
|
|
||||||
// Push notification dispatcher (nil when push isn't configured —
|
// Push notification dispatcher (legacy single-tier; nil when push
|
||||||
// hostfunc + HTTP handlers degrade to no-op / 503).
|
// isn't configured at all). When PushManager is also set, send paths
|
||||||
|
// route through the manager instead so per-namespace config wins.
|
||||||
PushDispatcher *push.PushDispatcher
|
PushDispatcher *push.PushDispatcher
|
||||||
PushDeviceStore push.PushDeviceStore
|
PushDeviceStore push.PushDeviceStore
|
||||||
|
|
||||||
|
// PushManager wraps the device store + per-namespace config store so
|
||||||
|
// tenants self-serve their push provider config via PUT /v1/push/config.
|
||||||
|
// Nil when push subsystem isn't initialized (cluster secret missing).
|
||||||
|
// When set, this is the canonical send path; PushDispatcher is the
|
||||||
|
// fallback used only if Manager is somehow missing.
|
||||||
|
PushManager *push.Manager
|
||||||
|
PushConfigStore push.ConfigStore
|
||||||
|
|
||||||
// Authentication service
|
// Authentication service
|
||||||
AuthService *auth.Service
|
AuthService *auth.Service
|
||||||
}
|
}
|
||||||
@ -464,10 +473,18 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
|
|||||||
secretsMgr = smImpl
|
secretsMgr = smImpl
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize push notification dispatcher if any provider is configured.
|
// Initialize push notification subsystem.
|
||||||
// Devices are stored encrypted in RQLite (see migration 023). Providers
|
//
|
||||||
// are registered based on gateway config; missing config = provider absent.
|
// Bug #220 follow-up: the subsystem now ALWAYS initializes when the
|
||||||
pushDispatcher, pushStore, err := buildPushDispatcher(cfg, deps.ORMClient, logger)
|
// cluster secret is available (so tenants can register devices and
|
||||||
|
// PUT their per-namespace push config), regardless of whether the
|
||||||
|
// gateway YAML has a default provider configured. The Manager wraps
|
||||||
|
// the device store + per-namespace ConfigStore; Send paths route
|
||||||
|
// through Manager so per-namespace config takes effect.
|
||||||
|
//
|
||||||
|
// PushDispatcher (legacy) is set only when YAML defaults exist —
|
||||||
|
// kept for back-compat with code that hasn't migrated to Manager.
|
||||||
|
pushDispatcher, pushStore, pushManager, pushCfgStore, err := buildPushDispatcher(cfg, deps.ORMClient, logger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Non-fatal: log and continue. Functions calling push_send will get nil
|
// Non-fatal: log and continue. Functions calling push_send will get nil
|
||||||
// (silent no-op) and HTTP /v1/push/* endpoints return 503.
|
// (silent no-op) and HTTP /v1/push/* endpoints return 503.
|
||||||
@ -476,6 +493,8 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
|
|||||||
}
|
}
|
||||||
deps.PushDispatcher = pushDispatcher
|
deps.PushDispatcher = pushDispatcher
|
||||||
deps.PushDeviceStore = pushStore
|
deps.PushDeviceStore = pushStore
|
||||||
|
deps.PushManager = pushManager
|
||||||
|
deps.PushConfigStore = pushCfgStore
|
||||||
|
|
||||||
// Create host functions provider (allows functions to call Orama services)
|
// Create host functions provider (allows functions to call Orama services)
|
||||||
hostFuncsCfg := hostfunctions.HostFunctionsConfig{
|
hostFuncsCfg := hostfunctions.HostFunctionsConfig{
|
||||||
@ -494,7 +513,8 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
|
|||||||
pubsubAdapter, // pubsub adapter for serverless functions
|
pubsubAdapter, // pubsub adapter for serverless functions
|
||||||
deps.ServerlessWSMgr,
|
deps.ServerlessWSMgr,
|
||||||
secretsMgr,
|
secretsMgr,
|
||||||
pushDispatcher, // may be nil — PushSend hostfunc handles that
|
pushDispatcher, // legacy — fallback when manager isn't wired
|
||||||
|
pushManager, // bug #220 follow-up — per-namespace config
|
||||||
deps.WSBridge, // may be nil; WSPubSubBridge returns explicit error
|
deps.WSBridge, // may be nil; WSPubSubBridge returns explicit error
|
||||||
hostFuncsCfg,
|
hostFuncsCfg,
|
||||||
logger.Logger,
|
logger.Logger,
|
||||||
@ -581,8 +601,21 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
|
|||||||
return fmt.Errorf("failed to initialize auth service: %w", err)
|
return fmt.Errorf("failed to initialize auth service: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load or create EdDSA key for new JWT tokens
|
// Load or create EdDSA key for new JWT tokens. Bug #215 fix: when
|
||||||
edKey, err := loadOrCreateEdSigningKey(cfg.DataDir, logger)
|
// cfg.ClusterSecret is set, the key is derived deterministically from
|
||||||
|
// it via HKDF, so every gateway in the cluster shares the same Ed25519
|
||||||
|
// keypair and JWTs verify cross-node. With an empty ClusterSecret the
|
||||||
|
// per-node legacy behaviour is retained (single-node test deployments).
|
||||||
|
if cfg.ClusterSecret == "" {
|
||||||
|
// Loud warning: a multi-node cluster booted without a cluster
|
||||||
|
// secret reproduces bug #215 (per-gateway random keys, JWTs
|
||||||
|
// unverifiable cross-node). Single-node test rigs are the only
|
||||||
|
// legitimate case.
|
||||||
|
logger.ComponentWarn(logging.ComponentGeneral,
|
||||||
|
"ClusterSecret is empty; JWT signing keys will be random per-node. "+
|
||||||
|
"Multi-node clusters MUST set ClusterSecret or JWTs will not verify across gateways (bug #215).")
|
||||||
|
}
|
||||||
|
edKey, err := loadOrCreateEdSigningKey(cfg.DataDir, cfg.ClusterSecret, logger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.ComponentWarn(logging.ComponentGeneral, "Failed to load EdDSA signing key; new JWTs will use RS256",
|
logger.ComponentWarn(logging.ComponentGeneral, "Failed to load EdDSA signing key; new JWTs will use RS256",
|
||||||
zap.Error(err))
|
zap.Error(err))
|
||||||
@ -791,39 +824,96 @@ func injectRQLiteAuth(dsn, username, password string) string {
|
|||||||
return dsn
|
return dsn
|
||||||
}
|
}
|
||||||
|
|
||||||
// buildPushDispatcher constructs a push.PushDispatcher + device store with
|
// buildPushDispatcher constructs the push subsystem.
|
||||||
// all enabled providers. Returns (nil, nil, nil) when no provider is
|
//
|
||||||
// configured — that's a supported state, not an error. Returns (nil, nil,
|
// As of bug #220 follow-up, push always initializes when ClusterSecret is
|
||||||
// err) on hard init failures (e.g. cluster secret missing for the
|
// available, regardless of whether any YAML provider config is set:
|
||||||
// encrypted device store).
|
//
|
||||||
func buildPushDispatcher(cfg *Config, db rqlite.Client, logger *logging.ColoredLogger) (*push.PushDispatcher, push.PushDeviceStore, error) {
|
// - Device store + ConfigStore always build (tenants need to register
|
||||||
if cfg.NtfyBaseURL == "" && cfg.ExpoAccessToken == "" {
|
// devices and set per-namespace push config even on gateways with no
|
||||||
// No providers configured — push is disabled.
|
// YAML defaults).
|
||||||
return nil, nil, nil
|
// - Manager wraps the stores + a YAML-derived Defaults fallback. Each
|
||||||
}
|
// namespace can override any default via PUT /v1/push/config.
|
||||||
|
// - The legacy single-tier dispatcher is built only when YAML defaults
|
||||||
|
// are non-empty — kept for back-compat with code paths that haven't
|
||||||
|
// migrated to Manager.
|
||||||
|
//
|
||||||
|
// Returns (nil, nil, nil, nil, nil) when ClusterSecret is missing
|
||||||
|
// (push subsystem disabled — credentials can't be encrypted safely).
|
||||||
|
// Returns hard error only on store-init failure.
|
||||||
|
func buildPushDispatcher(
|
||||||
|
cfg *Config,
|
||||||
|
db rqlite.Client,
|
||||||
|
logger *logging.ColoredLogger,
|
||||||
|
) (*push.PushDispatcher, push.PushDeviceStore, *push.Manager, push.ConfigStore, error) {
|
||||||
if cfg.ClusterSecret == "" {
|
if cfg.ClusterSecret == "" {
|
||||||
// Devices are encrypted at rest using a cluster-secret-derived key.
|
// Without the cluster secret we can't encrypt credentials at rest.
|
||||||
// Without it we can't store anything safely.
|
// Disable the whole push subsystem; HTTP routes return 503.
|
||||||
return nil, nil, fmt.Errorf("push enabled but ClusterSecret is empty")
|
return nil, nil, nil, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
store, err := push.NewRqliteDeviceStore(db, cfg.ClusterSecret, logger.Logger)
|
store, err := push.NewRqliteDeviceStore(db, cfg.ClusterSecret, logger.Logger)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("init push device store: %w", err)
|
return nil, nil, nil, nil, fmt.Errorf("init push device store: %w", err)
|
||||||
}
|
}
|
||||||
d := push.New(store, logger.Logger)
|
|
||||||
if cfg.NtfyBaseURL != "" {
|
cfgStore, err := push.NewRqliteConfigStore(db, cfg.ClusterSecret, logger.Logger)
|
||||||
d.Register(pushntfy.New(pushntfy.Config{
|
if err != nil {
|
||||||
BaseURL: cfg.NtfyBaseURL,
|
return nil, nil, nil, nil, fmt.Errorf("init push config store: %w", err)
|
||||||
AuthToken: cfg.NtfyAuthToken,
|
|
||||||
}, logger.Logger))
|
|
||||||
logger.ComponentInfo(logging.ComponentGeneral, "push provider registered: ntfy",
|
|
||||||
zap.String("base_url", cfg.NtfyBaseURL))
|
|
||||||
}
|
}
|
||||||
if cfg.ExpoAccessToken != "" {
|
|
||||||
d.Register(pushexpo.New(pushexpo.Config{
|
// ProviderFactory turns a resolved Config into the right set of
|
||||||
AccessToken: cfg.ExpoAccessToken,
|
// provider instances. Lives here in dependencies.go because this is
|
||||||
}, logger.Logger))
|
// the only place that imports both the manager package and the
|
||||||
logger.ComponentInfo(logging.ComponentGeneral, "push provider registered: expo")
|
// concrete provider sub-packages — keeps push core dep-cycle-free.
|
||||||
|
factory := func(c push.Config) []push.PushProvider {
|
||||||
|
var ps []push.PushProvider
|
||||||
|
if c.NtfyBaseURL != "" {
|
||||||
|
ps = append(ps, pushntfy.New(pushntfy.Config{
|
||||||
|
BaseURL: c.NtfyBaseURL,
|
||||||
|
AuthToken: c.NtfyAuthToken,
|
||||||
|
}, logger.Logger))
|
||||||
|
}
|
||||||
|
if c.ExpoAccessToken != "" {
|
||||||
|
ps = append(ps, pushexpo.New(pushexpo.Config{
|
||||||
|
AccessToken: c.ExpoAccessToken,
|
||||||
|
}, logger.Logger))
|
||||||
|
}
|
||||||
|
return ps
|
||||||
}
|
}
|
||||||
return d, store, nil
|
|
||||||
|
defaults := push.Defaults{
|
||||||
|
NtfyBaseURL: cfg.NtfyBaseURL,
|
||||||
|
NtfyAuthToken: cfg.NtfyAuthToken,
|
||||||
|
ExpoAccessToken: cfg.ExpoAccessToken,
|
||||||
|
}
|
||||||
|
manager := push.NewManager(store, cfgStore, defaults, factory, logger.Logger)
|
||||||
|
|
||||||
|
// Legacy single-tier dispatcher kept ONLY when YAML defaults exist —
|
||||||
|
// some non-Manager code paths (notably the WASM push_send hostfunc
|
||||||
|
// before its migration to Manager) still expect a populated
|
||||||
|
// PushDispatcher. New code routes via Manager.
|
||||||
|
var legacy *push.PushDispatcher
|
||||||
|
if !defaults.IsEmpty() {
|
||||||
|
legacy = push.New(store, logger.Logger)
|
||||||
|
for _, p := range factory(push.Config{
|
||||||
|
NtfyBaseURL: defaults.NtfyBaseURL,
|
||||||
|
NtfyAuthToken: defaults.NtfyAuthToken,
|
||||||
|
ExpoAccessToken: defaults.ExpoAccessToken,
|
||||||
|
}) {
|
||||||
|
legacy.Register(p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if defaults.NtfyBaseURL != "" {
|
||||||
|
logger.ComponentInfo(logging.ComponentGeneral, "push default provider: ntfy",
|
||||||
|
zap.String("base_url", defaults.NtfyBaseURL))
|
||||||
|
}
|
||||||
|
if defaults.ExpoAccessToken != "" {
|
||||||
|
logger.ComponentInfo(logging.ComponentGeneral, "push default provider: expo configured")
|
||||||
|
}
|
||||||
|
logger.ComponentInfo(logging.ComponentGeneral,
|
||||||
|
"push subsystem initialized; tenants can self-serve via PUT /v1/push/config")
|
||||||
|
|
||||||
|
return legacy, store, manager, cfgStore, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -367,7 +367,19 @@ func New(logger *logging.ColoredLogger, cfg *Config) (*Gateway, error) {
|
|||||||
// The handlers themselves return 503 if dispatcher/store is nil; we
|
// The handlers themselves return 503 if dispatcher/store is nil; we
|
||||||
// register them unconditionally so the routes always exist with a
|
// register them unconditionally so the routes always exist with a
|
||||||
// predictable shape.
|
// predictable shape.
|
||||||
if deps.PushDispatcher != nil {
|
//
|
||||||
|
// Prefer the Manager-backed constructor (bug #220 follow-up) so
|
||||||
|
// tenants can self-serve their push config via PUT /v1/push/config.
|
||||||
|
// Fall back to the legacy constructor when only the YAML-derived
|
||||||
|
// dispatcher is available (older deployments without ClusterSecret).
|
||||||
|
if deps.PushManager != nil {
|
||||||
|
gw.pushHandlers = pushhandlers.NewHandlersWithManager(
|
||||||
|
deps.PushManager,
|
||||||
|
deps.PushConfigStore,
|
||||||
|
deps.PushDeviceStore,
|
||||||
|
logger,
|
||||||
|
)
|
||||||
|
} else if deps.PushDispatcher != nil {
|
||||||
gw.pushHandlers = pushhandlers.NewHandlers(deps.PushDispatcher, deps.PushDeviceStore, logger)
|
gw.pushHandlers = pushhandlers.NewHandlers(deps.PushDispatcher, deps.PushDeviceStore, logger)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
233
core/pkg/gateway/handlers/push/config_handler.go
Normal file
233
core/pkg/gateway/handlers/push/config_handler.go
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
package push
|
||||||
|
|
||||||
|
// config_handler.go — tenant-self-service push provider configuration.
|
||||||
|
//
|
||||||
|
// Endpoints (mounted under /v1/push/config; namespace-ownership middleware applies):
|
||||||
|
//
|
||||||
|
// GET /v1/push/config → current config (secrets redacted: only "has_X" booleans)
|
||||||
|
// PUT /v1/push/config → set/update fields; sensitive credentials encrypted at rest
|
||||||
|
// DELETE /v1/push/config → clear the namespace's row (push reverts to gateway YAML defaults)
|
||||||
|
//
|
||||||
|
// Bug #220 follow-up. Eliminates the "tenant must file an ops ticket"
|
||||||
|
// workflow: once this lands, AnChat (and every future tenant) self-serves
|
||||||
|
// their push provider config via authenticated HTTP, no operator
|
||||||
|
// involvement.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/push"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// configManager is the subset of *push.Manager the config handlers need —
|
||||||
|
// kept narrow for testability.
|
||||||
|
type configManager interface {
|
||||||
|
IsConfigured(ctx contextLike, namespace string) bool
|
||||||
|
Invalidate(namespace string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// contextLike avoids importing context everywhere — the handler is
|
||||||
|
// already in package serverless which has request contexts.
|
||||||
|
type contextLike = interface {
|
||||||
|
Done() <-chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// PutConfigRequest is the body of PUT /v1/push/config.
|
||||||
|
//
|
||||||
|
// Field semantics:
|
||||||
|
// - Unset fields (zero value) leave the existing value alone.
|
||||||
|
// - Empty-string fields explicitly clear the value.
|
||||||
|
// - To clear the entire row use DELETE — that's clearer than empty PUT.
|
||||||
|
type PutConfigRequest struct {
|
||||||
|
NtfyBaseURL *string `json:"ntfy_base_url,omitempty"`
|
||||||
|
NtfyAuthToken *string `json:"ntfy_auth_token,omitempty"`
|
||||||
|
ExpoAccessToken *string `json:"expo_access_token,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// MaxConfigBodyBytes caps the PUT body size. Push tokens are typically
|
||||||
|
// well under 1 KB but we leave headroom.
|
||||||
|
const MaxConfigBodyBytes = 16 * 1024
|
||||||
|
|
||||||
|
// pushConfigManager is the concrete dependency the Handlers struct holds
|
||||||
|
// — a *push.Manager. We extract it via a small interface for tests.
|
||||||
|
type pushConfigManager interface {
|
||||||
|
IsConfigured(ctx interface{ Done() <-chan struct{} }, namespace string) bool
|
||||||
|
Invalidate(namespace string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetConfigHandler — GET /v1/push/config. Returns the namespace's current
|
||||||
|
// push provider config with sensitive fields REDACTED to boolean flags.
|
||||||
|
//
|
||||||
|
// Always 200 + canonical envelope-free body when the request is well-formed
|
||||||
|
// (clients can rely on the shape: an absent provider just shows
|
||||||
|
// `has_ntfy_auth_token: false`). 503 only when the config store itself
|
||||||
|
// isn't available on this gateway (e.g. push subsystem disabled).
|
||||||
|
func (h *Handlers) GetConfigHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if h.configStore == nil {
|
||||||
|
writeError(w, http.StatusServiceUnavailable,
|
||||||
|
"push config store not available on this gateway")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.Method != http.MethodGet {
|
||||||
|
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ns := resolveNamespace(r)
|
||||||
|
if ns == "" {
|
||||||
|
writeError(w, http.StatusForbidden, "namespace not resolved")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := h.configStore.Get(boundCtx(r), ns)
|
||||||
|
if err != nil && !errors.Is(err, push.ErrConfigNotFound) {
|
||||||
|
h.logger.ComponentWarn("push", "config GET failed",
|
||||||
|
zap.String("namespace", ns), zap.Error(err))
|
||||||
|
writeError(w, http.StatusInternalServerError, "failed to load config")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Not found → return empty redacted config. Clients distinguish
|
||||||
|
// "configured" via the boolean fields.
|
||||||
|
if cfg == nil {
|
||||||
|
writeJSON(w, http.StatusOK, push.RedactedConfig{Namespace: ns})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, http.StatusOK, cfg.Redacted())
|
||||||
|
}
|
||||||
|
|
||||||
|
// PutConfigHandler — PUT /v1/push/config. Updates the namespace's push
|
||||||
|
// provider config. Field-level semantics: nil JSON values leave the
|
||||||
|
// existing field untouched; explicit empty-strings clear it.
|
||||||
|
//
|
||||||
|
// On success returns the redacted config (same shape as GET) so clients
|
||||||
|
// can confirm what's now in place without echoing back the credentials.
|
||||||
|
//
|
||||||
|
// Invalidates the manager's cached dispatcher for this namespace so the
|
||||||
|
// next push send rebuilds with the fresh config.
|
||||||
|
func (h *Handlers) PutConfigHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if h.configStore == nil {
|
||||||
|
writeError(w, http.StatusServiceUnavailable,
|
||||||
|
"push config store not available on this gateway")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.Method != http.MethodPut && r.Method != http.MethodPost {
|
||||||
|
writeError(w, http.StatusMethodNotAllowed, "method not allowed (use PUT)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ns := resolveNamespace(r)
|
||||||
|
if ns == "" {
|
||||||
|
writeError(w, http.StatusForbidden, "namespace not resolved")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
caller := resolveCallerUserID(r)
|
||||||
|
if caller == "" {
|
||||||
|
writeError(w, http.StatusUnauthorized, "user authentication required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Body = http.MaxBytesReader(w, r.Body, MaxConfigBodyBytes)
|
||||||
|
var body PutConfigRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid body: expected JSON")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate URL fields look reasonable. We don't do hostname resolution
|
||||||
|
// here (slow, flaky); just reject obviously-wrong schemes.
|
||||||
|
if body.NtfyBaseURL != nil && *body.NtfyBaseURL != "" {
|
||||||
|
if !strings.HasPrefix(*body.NtfyBaseURL, "http://") &&
|
||||||
|
!strings.HasPrefix(*body.NtfyBaseURL, "https://") {
|
||||||
|
writeError(w, http.StatusBadRequest,
|
||||||
|
"ntfy_base_url must start with http:// or https://")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read existing for merge — PUT semantics are field-level, not
|
||||||
|
// whole-document replace.
|
||||||
|
existing, err := h.configStore.Get(boundCtx(r), ns)
|
||||||
|
if err != nil && !errors.Is(err, push.ErrConfigNotFound) {
|
||||||
|
h.logger.ComponentWarn("push", "config GET-before-PUT failed",
|
||||||
|
zap.String("namespace", ns), zap.Error(err))
|
||||||
|
writeError(w, http.StatusInternalServerError, "failed to load config")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cfg := push.Config{Namespace: ns, UpdatedAt: time.Now().Unix(), UpdatedBy: caller}
|
||||||
|
if existing != nil {
|
||||||
|
cfg.NtfyBaseURL = existing.NtfyBaseURL
|
||||||
|
cfg.NtfyAuthToken = existing.NtfyAuthToken
|
||||||
|
cfg.ExpoAccessToken = existing.ExpoAccessToken
|
||||||
|
}
|
||||||
|
if body.NtfyBaseURL != nil {
|
||||||
|
cfg.NtfyBaseURL = *body.NtfyBaseURL
|
||||||
|
}
|
||||||
|
if body.NtfyAuthToken != nil {
|
||||||
|
cfg.NtfyAuthToken = *body.NtfyAuthToken
|
||||||
|
}
|
||||||
|
if body.ExpoAccessToken != nil {
|
||||||
|
cfg.ExpoAccessToken = *body.ExpoAccessToken
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h.configStore.Upsert(boundCtx(r), cfg); err != nil {
|
||||||
|
h.logger.ComponentWarn("push", "config PUT failed",
|
||||||
|
zap.String("namespace", ns), zap.Error(err))
|
||||||
|
writeError(w, http.StatusInternalServerError, "failed to save config")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if h.manager != nil {
|
||||||
|
h.manager.Invalidate(ns)
|
||||||
|
}
|
||||||
|
|
||||||
|
h.logger.ComponentInfo("push", "config updated",
|
||||||
|
zap.String("namespace", ns),
|
||||||
|
zap.String("updated_by", caller),
|
||||||
|
zap.Bool("has_ntfy_url", cfg.NtfyBaseURL != ""),
|
||||||
|
zap.Bool("has_ntfy_auth_token", cfg.NtfyAuthToken != ""),
|
||||||
|
zap.Bool("has_expo_access_token", cfg.ExpoAccessToken != ""),
|
||||||
|
)
|
||||||
|
writeJSON(w, http.StatusOK, cfg.Redacted())
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteConfigHandler — DELETE /v1/push/config. Clears the namespace's
|
||||||
|
// row entirely; push reverts to gateway YAML defaults (or 503 "not
|
||||||
|
// configured" if no defaults).
|
||||||
|
func (h *Handlers) DeleteConfigHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if h.configStore == nil {
|
||||||
|
writeError(w, http.StatusServiceUnavailable,
|
||||||
|
"push config store not available on this gateway")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.Method != http.MethodDelete {
|
||||||
|
writeError(w, http.StatusMethodNotAllowed, "method not allowed")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ns := resolveNamespace(r)
|
||||||
|
if ns == "" {
|
||||||
|
writeError(w, http.StatusForbidden, "namespace not resolved")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
caller := resolveCallerUserID(r)
|
||||||
|
if caller == "" {
|
||||||
|
writeError(w, http.StatusUnauthorized, "user authentication required")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h.configStore.Delete(boundCtx(r), ns); err != nil {
|
||||||
|
h.logger.ComponentWarn("push", "config DELETE failed",
|
||||||
|
zap.String("namespace", ns), zap.Error(err))
|
||||||
|
writeError(w, http.StatusInternalServerError, "failed to delete config")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if h.manager != nil {
|
||||||
|
h.manager.Invalidate(ns)
|
||||||
|
}
|
||||||
|
h.logger.ComponentInfo("push", "config cleared",
|
||||||
|
zap.String("namespace", ns),
|
||||||
|
zap.String("cleared_by", caller),
|
||||||
|
)
|
||||||
|
writeJSON(w, http.StatusOK, map[string]string{"status": "cleared"})
|
||||||
|
}
|
||||||
@ -2,6 +2,7 @@ package push
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -221,7 +222,9 @@ func (h *Handlers) DeleteDeviceHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
// exposing this in production.** The WASM hostfunc bypasses this issue
|
// exposing this in production.** The WASM hostfunc bypasses this issue
|
||||||
// because trigger registration already gates which functions exist.
|
// because trigger registration already gates which functions exist.
|
||||||
func (h *Handlers) SendHandler(w http.ResponseWriter, r *http.Request) {
|
func (h *Handlers) SendHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
if h.dispatcher == nil {
|
// Either the per-namespace manager (preferred) or the legacy single
|
||||||
|
// dispatcher must be present. Both nil = push not configured at all.
|
||||||
|
if h.manager == nil && h.dispatcher == nil {
|
||||||
writeError(w, http.StatusServiceUnavailable, "push: dispatcher not configured")
|
writeError(w, http.StatusServiceUnavailable, "push: dispatcher not configured")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -261,14 +264,27 @@ func (h *Handlers) SendHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
Sound: body.Sound,
|
Sound: body.Sound,
|
||||||
Data: body.Data,
|
Data: body.Data,
|
||||||
}
|
}
|
||||||
if err := h.dispatcher.SendToUser(boundCtx(r), ns, body.UserID, msg); err != nil {
|
// Prefer the per-namespace Manager when present so per-namespace
|
||||||
|
// config (set via PUT /v1/push/config) takes effect. Fall back to the
|
||||||
|
// legacy single dispatcher only when no Manager is wired.
|
||||||
|
var sendErr error
|
||||||
|
if h.manager != nil {
|
||||||
|
sendErr = h.manager.SendToUser(boundCtx(r), ns, body.UserID, msg)
|
||||||
|
if errors.Is(sendErr, push.ErrPushNotConfigured) {
|
||||||
|
writeError(w, http.StatusServiceUnavailable, sendErr.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sendErr = h.dispatcher.SendToUser(boundCtx(r), ns, body.UserID, msg)
|
||||||
|
}
|
||||||
|
if sendErr != nil {
|
||||||
// Treat as non-fatal: some devices may have failed but others may
|
// Treat as non-fatal: some devices may have failed but others may
|
||||||
// have succeeded. Surface as 502 to signal partial trouble; logs
|
// have succeeded. Surface as 502 to signal partial trouble; logs
|
||||||
// have the per-device detail.
|
// have the per-device detail.
|
||||||
h.logger.ComponentWarn("push", "send to user partially failed",
|
h.logger.ComponentWarn("push", "send to user partially failed",
|
||||||
zap.String("namespace", ns),
|
zap.String("namespace", ns),
|
||||||
zap.String("user_id", body.UserID),
|
zap.String("user_id", body.UserID),
|
||||||
zap.Error(err))
|
zap.Error(sendErr))
|
||||||
writeError(w, http.StatusBadGateway, "one or more devices failed")
|
writeError(w, http.StatusBadGateway, "one or more devices failed")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@ -26,14 +26,26 @@ import (
|
|||||||
|
|
||||||
// Handlers serves the /v1/push/* HTTP endpoints. Construct via NewHandlers;
|
// Handlers serves the /v1/push/* HTTP endpoints. Construct via NewHandlers;
|
||||||
// it's safe for concurrent use.
|
// it's safe for concurrent use.
|
||||||
|
//
|
||||||
|
// dispatcher is the legacy single-tier dispatcher (kept for the device
|
||||||
|
// register/list/delete + send paths). manager is the per-namespace
|
||||||
|
// dispatcher built on top of ConfigStore (new in bug #220 follow-up);
|
||||||
|
// when both are present, send paths route through manager so per-namespace
|
||||||
|
// config wins.
|
||||||
|
//
|
||||||
|
// configStore + manager may be nil on gateways with push fully disabled —
|
||||||
|
// the corresponding endpoints return 503.
|
||||||
type Handlers struct {
|
type Handlers struct {
|
||||||
dispatcher *push.PushDispatcher
|
dispatcher *push.PushDispatcher
|
||||||
store push.PushDeviceStore
|
manager *push.Manager
|
||||||
logger *logging.ColoredLogger
|
store push.PushDeviceStore
|
||||||
|
configStore push.ConfigStore
|
||||||
|
logger *logging.ColoredLogger
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewHandlers constructs a Handlers. Either argument may be nil — in which
|
// NewHandlers constructs a Handlers with the legacy single-namespace
|
||||||
// case the corresponding endpoints return 503 Service Unavailable.
|
// dispatcher only. Use NewHandlersWithManager for per-namespace config
|
||||||
|
// support (bug #220 follow-up).
|
||||||
func NewHandlers(dispatcher *push.PushDispatcher, store push.PushDeviceStore, logger *logging.ColoredLogger) *Handlers {
|
func NewHandlers(dispatcher *push.PushDispatcher, store push.PushDeviceStore, logger *logging.ColoredLogger) *Handlers {
|
||||||
return &Handlers{
|
return &Handlers{
|
||||||
dispatcher: dispatcher,
|
dispatcher: dispatcher,
|
||||||
@ -42,6 +54,23 @@ func NewHandlers(dispatcher *push.PushDispatcher, store push.PushDeviceStore, lo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewHandlersWithManager constructs Handlers wired to a Manager + ConfigStore
|
||||||
|
// for tenant-self-service per-namespace configuration. Send paths use the
|
||||||
|
// manager when present so per-namespace ntfy/expo settings take effect.
|
||||||
|
func NewHandlersWithManager(
|
||||||
|
manager *push.Manager,
|
||||||
|
configStore push.ConfigStore,
|
||||||
|
deviceStore push.PushDeviceStore,
|
||||||
|
logger *logging.ColoredLogger,
|
||||||
|
) *Handlers {
|
||||||
|
return &Handlers{
|
||||||
|
manager: manager,
|
||||||
|
configStore: configStore,
|
||||||
|
store: deviceStore,
|
||||||
|
logger: logger,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// RegisterDeviceRequest is the body of POST /v1/push/devices.
|
// RegisterDeviceRequest is the body of POST /v1/push/devices.
|
||||||
//
|
//
|
||||||
// `device_id` is an app-supplied stable identifier (e.g. the OS-assigned
|
// `device_id` is an app-supplied stable identifier (e.g. the OS-assigned
|
||||||
|
|||||||
@ -173,6 +173,43 @@ func (h *ServerlessHandlers) DeployFunction(w http.ResponseWriter, r *http.Reque
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Register Cron triggers from definition. Mirrors the PubSub branch above:
|
||||||
|
// stale rows (from a previous deploy whose manifest had different cron
|
||||||
|
// schedules) are cleared first, then the manifest's expressions are
|
||||||
|
// re-added. Without this, manifest-driven cron schedules silently never
|
||||||
|
// fired (feature #65 audit).
|
||||||
|
//
|
||||||
|
// We always run the RemoveByFunction even when CronExpressions is empty,
|
||||||
|
// otherwise editing a manifest from `cron_expressions: ["0 3 * * *"]` to
|
||||||
|
// `cron_expressions: []` would leave the old schedule in place.
|
||||||
|
if h.cronStore != nil && fn != nil {
|
||||||
|
if err := h.cronStore.RemoveByFunction(ctx, fn.ID); err != nil {
|
||||||
|
h.logger.Warn("Failed to clear stale cron triggers",
|
||||||
|
zap.String("function", def.Name),
|
||||||
|
zap.Error(err))
|
||||||
|
}
|
||||||
|
// Dedupe identical expressions so a manifest accident
|
||||||
|
// (`cron_expressions: ["0 3 * * *", "0 3 * * *"]`) doesn't fire the
|
||||||
|
// function twice every tick.
|
||||||
|
seen := make(map[string]struct{}, len(def.CronExpressions))
|
||||||
|
for _, expr := range def.CronExpressions {
|
||||||
|
if _, dup := seen[expr]; dup {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[expr] = struct{}{}
|
||||||
|
if _, err := h.cronStore.Add(ctx, fn.ID, expr); err != nil {
|
||||||
|
// Bad expression in a manifest is a user error worth surfacing
|
||||||
|
// but not blocking the deploy — the function itself is fine,
|
||||||
|
// only the schedule is dropped. Logged at WARN so operators
|
||||||
|
// see it; the deploy response still reports success.
|
||||||
|
h.logger.Warn("Failed to register cron trigger from manifest",
|
||||||
|
zap.String("function", def.Name),
|
||||||
|
zap.String("cron_expression", expr),
|
||||||
|
zap.Error(err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
writeJSON(w, http.StatusCreated, map[string]interface{}{
|
writeJSON(w, http.StatusCreated, map[string]interface{}{
|
||||||
"message": "Function deployed successfully",
|
"message": "Function deployed successfully",
|
||||||
"function": fn,
|
"function": fn,
|
||||||
|
|||||||
@ -110,21 +110,29 @@ type GatewayYAMLWebRTC struct {
|
|||||||
// This must match the yamlCfg struct in cmd/gateway/config.go exactly
|
// This must match the yamlCfg struct in cmd/gateway/config.go exactly
|
||||||
// because the gateway uses strict YAML decoding that rejects unknown fields
|
// because the gateway uses strict YAML decoding that rejects unknown fields
|
||||||
type GatewayYAMLConfig struct {
|
type GatewayYAMLConfig struct {
|
||||||
ListenAddr string `yaml:"listen_addr"`
|
ListenAddr string `yaml:"listen_addr"`
|
||||||
ClientNamespace string `yaml:"client_namespace"`
|
ClientNamespace string `yaml:"client_namespace"`
|
||||||
RQLiteDSN string `yaml:"rqlite_dsn"`
|
RQLiteDSN string `yaml:"rqlite_dsn"`
|
||||||
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn,omitempty"`
|
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn,omitempty"`
|
||||||
BootstrapPeers []string `yaml:"bootstrap_peers,omitempty"`
|
BootstrapPeers []string `yaml:"bootstrap_peers,omitempty"`
|
||||||
EnableHTTPS bool `yaml:"enable_https,omitempty"`
|
EnableHTTPS bool `yaml:"enable_https,omitempty"`
|
||||||
DomainName string `yaml:"domain_name,omitempty"`
|
DomainName string `yaml:"domain_name,omitempty"`
|
||||||
TLSCacheDir string `yaml:"tls_cache_dir,omitempty"`
|
TLSCacheDir string `yaml:"tls_cache_dir,omitempty"`
|
||||||
OlricServers []string `yaml:"olric_servers"`
|
OlricServers []string `yaml:"olric_servers"`
|
||||||
OlricTimeout string `yaml:"olric_timeout,omitempty"`
|
OlricTimeout string `yaml:"olric_timeout,omitempty"`
|
||||||
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url,omitempty"`
|
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url,omitempty"`
|
||||||
IPFSAPIURL string `yaml:"ipfs_api_url,omitempty"`
|
IPFSAPIURL string `yaml:"ipfs_api_url,omitempty"`
|
||||||
IPFSTimeout string `yaml:"ipfs_timeout,omitempty"`
|
IPFSTimeout string `yaml:"ipfs_timeout,omitempty"`
|
||||||
IPFSReplicationFactor int `yaml:"ipfs_replication_factor,omitempty"`
|
IPFSReplicationFactor int `yaml:"ipfs_replication_factor,omitempty"`
|
||||||
WebRTC GatewayYAMLWebRTC `yaml:"webrtc,omitempty"`
|
WebRTC GatewayYAMLWebRTC `yaml:"webrtc,omitempty"`
|
||||||
|
// ClusterSecretPath points to the host's cluster-secret file. Bug #215
|
||||||
|
// follow-up: namespace gateways spawned by systemd previously had no
|
||||||
|
// way to access the cluster secret, so they fell back to per-node
|
||||||
|
// random JWT signing keys and JWTs were unverifiable cross-node within
|
||||||
|
// the namespace cluster. Setting this path lets the standalone gateway
|
||||||
|
// binary read the secret on startup and derive the canonical Ed25519
|
||||||
|
// key shared with every other gateway in the cluster.
|
||||||
|
ClusterSecretPath string `yaml:"cluster_secret_path,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInstanceSpawner creates a new Gateway instance spawner
|
// NewInstanceSpawner creates a new Gateway instance spawner
|
||||||
|
|||||||
87
core/pkg/gateway/instance_spawner_test.go
Normal file
87
core/pkg/gateway/instance_spawner_test.go
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
package gateway
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestGatewayYAMLConfig_clusterSecretPathRoundTrip is the regression test for
|
||||||
|
// bug #215 reopen. The first fix derived JWT keys from cfg.ClusterSecret, but
|
||||||
|
// namespace gateways spawned via systemd had cfg.ClusterSecret == "" because
|
||||||
|
// the YAML schema lacked any field to carry it. This test guards the YAML tag
|
||||||
|
// so every namespace gateway YAML written by SystemdSpawner.SpawnGateway
|
||||||
|
// carries the path the standalone binary needs.
|
||||||
|
func TestGatewayYAMLConfig_clusterSecretPathRoundTrip(t *testing.T) {
|
||||||
|
cfg := GatewayYAMLConfig{
|
||||||
|
ListenAddr: ":6001",
|
||||||
|
ClientNamespace: "anchat-test",
|
||||||
|
RQLiteDSN: "http://localhost:10000",
|
||||||
|
OlricServers: []string{"localhost:3320"},
|
||||||
|
ClusterSecretPath: "/opt/orama/.orama/secrets/cluster-secret",
|
||||||
|
}
|
||||||
|
out, err := yaml.Marshal(cfg)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(string(out), "cluster_secret_path: /opt/orama/.orama/secrets/cluster-secret") {
|
||||||
|
t.Fatalf("YAML output missing expected cluster_secret_path line:\n%s", out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round-trip into a struct shaped like cmd/gateway/config.go's yamlCfg
|
||||||
|
// (internal type, duplicated here intentionally so this test catches
|
||||||
|
// drift between the two declarations).
|
||||||
|
type webrtc struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
SFUPort int `yaml:"sfu_port"`
|
||||||
|
TURNDomain string `yaml:"turn_domain"`
|
||||||
|
TURNSecret string `yaml:"turn_secret"`
|
||||||
|
}
|
||||||
|
type yamlCfgMirror struct {
|
||||||
|
ListenAddr string `yaml:"listen_addr"`
|
||||||
|
ClientNamespace string `yaml:"client_namespace"`
|
||||||
|
RQLiteDSN string `yaml:"rqlite_dsn"`
|
||||||
|
GlobalRQLiteDSN string `yaml:"global_rqlite_dsn"`
|
||||||
|
Peers []string `yaml:"bootstrap_peers"`
|
||||||
|
EnableHTTPS bool `yaml:"enable_https"`
|
||||||
|
DomainName string `yaml:"domain_name"`
|
||||||
|
TLSCacheDir string `yaml:"tls_cache_dir"`
|
||||||
|
OlricServers []string `yaml:"olric_servers"`
|
||||||
|
OlricTimeout string `yaml:"olric_timeout"`
|
||||||
|
IPFSClusterAPIURL string `yaml:"ipfs_cluster_api_url"`
|
||||||
|
IPFSAPIURL string `yaml:"ipfs_api_url"`
|
||||||
|
IPFSTimeout string `yaml:"ipfs_timeout"`
|
||||||
|
IPFSReplicationFactor int `yaml:"ipfs_replication_factor"`
|
||||||
|
WebRTC webrtc `yaml:"webrtc"`
|
||||||
|
ClusterSecretPath string `yaml:"cluster_secret_path"`
|
||||||
|
}
|
||||||
|
var parsed yamlCfgMirror
|
||||||
|
if err := yaml.Unmarshal(out, &parsed); err != nil {
|
||||||
|
t.Fatalf("unmarshal: %v", err)
|
||||||
|
}
|
||||||
|
if parsed.ClusterSecretPath != cfg.ClusterSecretPath {
|
||||||
|
t.Errorf("round-trip mismatch: got %q, want %q", parsed.ClusterSecretPath, cfg.ClusterSecretPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGatewayYAMLConfig_omitWhenEmpty: when the host has no cluster secret,
|
||||||
|
// the field is omitted from the YAML so legacy single-node test rigs don't
|
||||||
|
// see a stray "cluster_secret_path: " line that operators might mistake for
|
||||||
|
// a configuration directive.
|
||||||
|
func TestGatewayYAMLConfig_omitWhenEmpty(t *testing.T) {
|
||||||
|
cfg := GatewayYAMLConfig{
|
||||||
|
ListenAddr: ":6001",
|
||||||
|
ClientNamespace: "ns",
|
||||||
|
RQLiteDSN: "http://localhost:10000",
|
||||||
|
OlricServers: []string{"localhost:3320"},
|
||||||
|
// ClusterSecretPath intentionally empty.
|
||||||
|
}
|
||||||
|
out, err := yaml.Marshal(cfg)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal: %v", err)
|
||||||
|
}
|
||||||
|
if strings.Contains(string(out), "cluster_secret_path") {
|
||||||
|
t.Errorf("empty ClusterSecretPath should be omitted from YAML; got:\n%s", out)
|
||||||
|
}
|
||||||
|
}
|
||||||
194
core/pkg/gateway/internal_auth_test.go
Normal file
194
core/pkg/gateway/internal_auth_test.go
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
package gateway
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/gateway/auth"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestSetInternalAuthJWTHeaders_setsSubAndCustom verifies that the proxy
|
||||||
|
// helper writes the validated JWT subject and base64(JSON) custom claims to
|
||||||
|
// outbound headers (bug #215 wiring).
|
||||||
|
func TestSetInternalAuthJWTHeaders_setsSubAndCustom(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
claims := &auth.JWTClaims{
|
||||||
|
Sub: "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE",
|
||||||
|
Custom: map[string]string{
|
||||||
|
"tier": "pro",
|
||||||
|
"subscription": "active",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
setInternalAuthJWTHeaders(h, claims)
|
||||||
|
|
||||||
|
if got := h.Get(HeaderInternalAuthJWTSub); got != claims.Sub {
|
||||||
|
t.Errorf("Sub header = %q, want %q", got, claims.Sub)
|
||||||
|
}
|
||||||
|
if h.Get(HeaderInternalAuthJWTCustom) == "" {
|
||||||
|
t.Error("Custom header is empty, expected base64 JSON blob")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSetInternalAuthJWTHeaders_nilClaimsNoOp guards the API-key path: when
|
||||||
|
// no JWT was used to authenticate, no JWT headers should be set.
|
||||||
|
func TestSetInternalAuthJWTHeaders_nilClaimsNoOp(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
setInternalAuthJWTHeaders(h, nil)
|
||||||
|
if got := h.Get(HeaderInternalAuthJWTSub); got != "" {
|
||||||
|
t.Errorf("Sub header = %q, want empty (nil claims)", got)
|
||||||
|
}
|
||||||
|
if got := h.Get(HeaderInternalAuthJWTCustom); got != "" {
|
||||||
|
t.Errorf("Custom header = %q, want empty (nil claims)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSetInternalAuthJWTHeaders_emptySubNotForwarded keeps malformed claims
|
||||||
|
// from polluting the outbound request.
|
||||||
|
func TestSetInternalAuthJWTHeaders_emptySubNotForwarded(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
setInternalAuthJWTHeaders(h, &auth.JWTClaims{Sub: " "})
|
||||||
|
if got := h.Get(HeaderInternalAuthJWTSub); got != "" {
|
||||||
|
t.Errorf("Sub header = %q, want empty (whitespace-only sub)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestSetInternalAuthJWTHeaders_emptyCustomMapNotForwarded — empty maps are
|
||||||
|
// skipped to keep headers small and avoid sending `e30=` (base64 of "{}").
|
||||||
|
func TestSetInternalAuthJWTHeaders_emptyCustomMapNotForwarded(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
setInternalAuthJWTHeaders(h, &auth.JWTClaims{Sub: "0xabc", Custom: map[string]string{}})
|
||||||
|
if got := h.Get(HeaderInternalAuthJWTCustom); got != "" {
|
||||||
|
t.Errorf("Custom header = %q, want empty (empty custom map)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestClaimsFromInternalAuthHeaders_roundTrip is the end-to-end guarantee:
|
||||||
|
// what the main gateway sets via setInternalAuthJWTHeaders MUST be exactly
|
||||||
|
// what the namespace gateway recovers via claimsFromInternalAuthHeaders.
|
||||||
|
func TestClaimsFromInternalAuthHeaders_roundTrip(t *testing.T) {
|
||||||
|
original := &auth.JWTClaims{
|
||||||
|
Sub: "BNbN2RNQTsYrrywZCLnhV9j3hd38jwcRqfxBecZX7hDE",
|
||||||
|
Custom: map[string]string{
|
||||||
|
"tier": "pro",
|
||||||
|
"subscription": "active",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
h := http.Header{}
|
||||||
|
setInternalAuthJWTHeaders(h, original)
|
||||||
|
|
||||||
|
got := claimsFromInternalAuthHeaders(h, "anchat-test")
|
||||||
|
if got == nil {
|
||||||
|
t.Fatal("recovered claims is nil")
|
||||||
|
}
|
||||||
|
if got.Sub != original.Sub {
|
||||||
|
t.Errorf("Sub = %q, want %q", got.Sub, original.Sub)
|
||||||
|
}
|
||||||
|
if got.Namespace != "anchat-test" {
|
||||||
|
t.Errorf("Namespace = %q, want %q", got.Namespace, "anchat-test")
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(got.Custom, original.Custom) {
|
||||||
|
t.Errorf("Custom = %#v, want %#v", got.Custom, original.Custom)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestClaimsFromInternalAuthHeaders_noSubReturnsNil — when no JWT was
|
||||||
|
// forwarded (caller used API key), recovery returns nil so the namespace
|
||||||
|
// gateway leaves ctxKeyJWT unset.
|
||||||
|
func TestClaimsFromInternalAuthHeaders_noSubReturnsNil(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
if got := claimsFromInternalAuthHeaders(h, "ns"); got != nil {
|
||||||
|
t.Errorf("expected nil claims when no Sub header present, got %#v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestClaimsFromInternalAuthHeaders_invalidCustomIgnored — corrupt custom
|
||||||
|
// blob must not block recovery of the subject.
|
||||||
|
func TestClaimsFromInternalAuthHeaders_invalidCustomIgnored(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
h.Set(HeaderInternalAuthJWTSub, "0xabc")
|
||||||
|
h.Set(HeaderInternalAuthJWTCustom, "not-valid-base64!!!")
|
||||||
|
|
||||||
|
got := claimsFromInternalAuthHeaders(h, "ns")
|
||||||
|
if got == nil {
|
||||||
|
t.Fatal("recovered claims is nil despite valid Sub header")
|
||||||
|
}
|
||||||
|
if got.Sub != "0xabc" {
|
||||||
|
t.Errorf("Sub = %q, want %q", got.Sub, "0xabc")
|
||||||
|
}
|
||||||
|
if got.Custom != nil {
|
||||||
|
t.Errorf("Custom = %#v, want nil (invalid blob should be silently dropped)", got.Custom)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStripInboundInternalAuthHeaders_removesAllForgedHeaders is the regression
|
||||||
|
// test for the security audit's CRITICAL finding (bug #215 follow-up). Without
|
||||||
|
// this strip, an external attacker could forge X-Internal-Auth-JWT-Sub on a
|
||||||
|
// public path and impersonate any wallet on the namespace gateway, which gates
|
||||||
|
// on source IP only.
|
||||||
|
func TestStripInboundInternalAuthHeaders_removesAllForgedHeaders(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
h.Set(HeaderInternalAuthValidated, "true")
|
||||||
|
h.Set(HeaderInternalAuthNamespace, "victim-namespace")
|
||||||
|
h.Set(HeaderInternalAuthJWTSub, "0xVICTIM_WALLET")
|
||||||
|
h.Set(HeaderInternalAuthJWTCustom, "ZXZpbA==") // base64("evil")
|
||||||
|
// Unrelated headers MUST survive the strip.
|
||||||
|
h.Set("X-Forwarded-For", "1.2.3.4")
|
||||||
|
h.Set("Authorization", "Bearer some-token")
|
||||||
|
|
||||||
|
stripInboundInternalAuthHeaders(h)
|
||||||
|
|
||||||
|
for _, name := range []string{
|
||||||
|
HeaderInternalAuthValidated,
|
||||||
|
HeaderInternalAuthNamespace,
|
||||||
|
HeaderInternalAuthJWTSub,
|
||||||
|
HeaderInternalAuthJWTCustom,
|
||||||
|
} {
|
||||||
|
if got := h.Get(name); got != "" {
|
||||||
|
t.Errorf("after strip, %s = %q, want empty", name, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := h.Get("X-Forwarded-For"); got != "1.2.3.4" {
|
||||||
|
t.Errorf("X-Forwarded-For lost: %q", got)
|
||||||
|
}
|
||||||
|
if got := h.Get("Authorization"); got != "Bearer some-token" {
|
||||||
|
t.Errorf("Authorization lost: %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStripThenSet_attackerSubReplaced exercises the proxy-hop sequence:
|
||||||
|
// strip-then-set. An attacker-controlled JWT-Sub header MUST be replaced by
|
||||||
|
// the gateway-validated value (or removed entirely when the caller used an
|
||||||
|
// API key and validatedClaims is nil).
|
||||||
|
func TestStripThenSet_attackerSubReplaced(t *testing.T) {
|
||||||
|
t.Run("validated_jwt_overwrites_attacker_sub", func(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
h.Set(HeaderInternalAuthJWTSub, "0xATTACKER")
|
||||||
|
|
||||||
|
stripInboundInternalAuthHeaders(h)
|
||||||
|
// Mimic the proxy hop: gateway authenticated user as 0xLEGIT.
|
||||||
|
h.Set(HeaderInternalAuthValidated, "true")
|
||||||
|
h.Set(HeaderInternalAuthNamespace, "ns")
|
||||||
|
setInternalAuthJWTHeaders(h, &auth.JWTClaims{Sub: "0xLEGIT"})
|
||||||
|
|
||||||
|
if got := h.Get(HeaderInternalAuthJWTSub); got != "0xLEGIT" {
|
||||||
|
t.Errorf("Sub = %q, want %q (attacker value should be overwritten)", got, "0xLEGIT")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("api_key_path_drops_attacker_sub", func(t *testing.T) {
|
||||||
|
h := http.Header{}
|
||||||
|
h.Set(HeaderInternalAuthJWTSub, "0xATTACKER")
|
||||||
|
|
||||||
|
stripInboundInternalAuthHeaders(h)
|
||||||
|
// Mimic the proxy hop: gateway authenticated user via API key
|
||||||
|
// (validatedClaims is nil).
|
||||||
|
h.Set(HeaderInternalAuthValidated, "true")
|
||||||
|
h.Set(HeaderInternalAuthNamespace, "ns")
|
||||||
|
setInternalAuthJWTHeaders(h, nil)
|
||||||
|
|
||||||
|
if got := h.Get(HeaderInternalAuthJWTSub); got != "" {
|
||||||
|
t.Errorf("Sub = %q, want empty (attacker value must not survive API-key auth)", got)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
@ -2,6 +2,8 @@ package gateway
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
@ -70,27 +72,114 @@ const (
|
|||||||
HeaderInternalAuthNamespace = "X-Internal-Auth-Namespace"
|
HeaderInternalAuthNamespace = "X-Internal-Auth-Namespace"
|
||||||
// HeaderInternalAuthValidated indicates the request was pre-authenticated by main gateway
|
// HeaderInternalAuthValidated indicates the request was pre-authenticated by main gateway
|
||||||
HeaderInternalAuthValidated = "X-Internal-Auth-Validated"
|
HeaderInternalAuthValidated = "X-Internal-Auth-Validated"
|
||||||
|
// HeaderInternalAuthJWTSub carries the validated JWT `sub` claim across the
|
||||||
|
// internal proxy hop. Bug #215: without this, namespace gateways received
|
||||||
|
// only the namespace and host functions saw empty `caller_jwt_subject`.
|
||||||
|
// Trusted only when X-Internal-Auth-Validated=true AND the source IP is
|
||||||
|
// internal (WireGuard mesh / loopback) — same trust model as the
|
||||||
|
// namespace header.
|
||||||
|
HeaderInternalAuthJWTSub = "X-Internal-Auth-JWT-Sub"
|
||||||
|
// HeaderInternalAuthJWTCustom carries the validated JWT custom-claims map
|
||||||
|
// as a base64(JSON) blob. Optional; absent means the original token had
|
||||||
|
// no custom claims. Same trust gate as HeaderInternalAuthJWTSub.
|
||||||
|
HeaderInternalAuthJWTCustom = "X-Internal-Auth-JWT-Custom"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// setInternalAuthJWTHeaders writes the validated JWT subject and custom claims
|
||||||
|
// onto an outbound proxy request so the destination gateway can hydrate
|
||||||
|
// ctxKeyJWT. Called only after auth has been validated AND we're proxying over
|
||||||
|
// the WireGuard mesh (the destination still gates trust by source IP).
|
||||||
|
//
|
||||||
|
// No-op when claims is nil (caller authenticated with API key, not JWT).
|
||||||
|
func setInternalAuthJWTHeaders(h http.Header, claims *auth.JWTClaims) {
|
||||||
|
if claims == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if sub := strings.TrimSpace(claims.Sub); sub != "" {
|
||||||
|
h.Set(HeaderInternalAuthJWTSub, sub)
|
||||||
|
}
|
||||||
|
if len(claims.Custom) > 0 {
|
||||||
|
buf, err := json.Marshal(claims.Custom)
|
||||||
|
if err == nil {
|
||||||
|
h.Set(HeaderInternalAuthJWTCustom, base64.StdEncoding.EncodeToString(buf))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripInboundInternalAuthHeaders deletes the X-Internal-Auth-* headers from
|
||||||
|
// the supplied header set. MUST be called on every outbound request the main
|
||||||
|
// gateway proxies to a namespace gateway BEFORE we (conditionally) re-set
|
||||||
|
// them with the validated values.
|
||||||
|
//
|
||||||
|
// SECURITY (bug #215 follow-up — critical): without this, an external client
|
||||||
|
// could send `X-Internal-Auth-Validated: true` + `X-Internal-Auth-JWT-Sub:
|
||||||
|
// <victim-wallet>` directly. The header-copy loop at the proxy hop would
|
||||||
|
// forward those headers verbatim. The namespace gateway, seeing the request
|
||||||
|
// arrive from the main gateway's WireGuard IP, would trust them and hydrate
|
||||||
|
// ctxKeyJWT with attacker-controlled subject — full cross-namespace identity
|
||||||
|
// spoofing on every public path.
|
||||||
|
//
|
||||||
|
// Stripping FIRST and conditionally setting AFTER closes that hole regardless
|
||||||
|
// of which auth path won (JWT, API key, or no creds on a public path).
|
||||||
|
func stripInboundInternalAuthHeaders(h http.Header) {
|
||||||
|
h.Del(HeaderInternalAuthValidated)
|
||||||
|
h.Del(HeaderInternalAuthNamespace)
|
||||||
|
h.Del(HeaderInternalAuthJWTSub)
|
||||||
|
h.Del(HeaderInternalAuthJWTCustom)
|
||||||
|
}
|
||||||
|
|
||||||
|
// claimsFromInternalAuthHeaders rebuilds a *auth.JWTClaims from the trusted
|
||||||
|
// internal-auth headers. Returns nil if no JWT subject was forwarded (the
|
||||||
|
// caller used an API key, or the request didn't carry validated JWT data).
|
||||||
|
//
|
||||||
|
// SECURITY: this MUST only be called after the caller has confirmed
|
||||||
|
// HeaderInternalAuthValidated == "true" AND the source IP is internal AND
|
||||||
|
// the proxy hop has stripped any inbound copies of these headers (see
|
||||||
|
// stripInboundInternalAuthHeaders). Skipping any of those would let any
|
||||||
|
// client forge JWT identities.
|
||||||
|
func claimsFromInternalAuthHeaders(h http.Header, namespace string) *auth.JWTClaims {
|
||||||
|
sub := strings.TrimSpace(h.Get(HeaderInternalAuthJWTSub))
|
||||||
|
if sub == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
claims := &auth.JWTClaims{
|
||||||
|
Sub: sub,
|
||||||
|
Namespace: namespace,
|
||||||
|
}
|
||||||
|
if raw := strings.TrimSpace(h.Get(HeaderInternalAuthJWTCustom)); raw != "" {
|
||||||
|
if decoded, err := base64.StdEncoding.DecodeString(raw); err == nil {
|
||||||
|
var custom map[string]string
|
||||||
|
if json.Unmarshal(decoded, &custom) == nil && len(custom) > 0 {
|
||||||
|
claims.Custom = custom
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return claims
|
||||||
|
}
|
||||||
|
|
||||||
// validateAuthForNamespaceProxy validates the request's auth credentials against the MAIN
|
// validateAuthForNamespaceProxy validates the request's auth credentials against the MAIN
|
||||||
// cluster RQLite and returns the namespace the credentials belong to.
|
// cluster RQLite and returns the namespace the credentials belong to plus the
|
||||||
|
// JWT claims when the caller authenticated with a Bearer JWT.
|
||||||
// This is used by handleNamespaceGatewayRequest to pre-authenticate before proxying to
|
// This is used by handleNamespaceGatewayRequest to pre-authenticate before proxying to
|
||||||
// namespace gateways (which have isolated RQLites without API keys).
|
// namespace gateways (which have isolated RQLites without API keys).
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - (namespace, "") if auth is valid
|
// - (namespace, claims, "") if auth is valid via JWT — claims may be used to
|
||||||
// - ("", errorMessage) if auth is invalid
|
// populate internal-auth headers so the namespace gateway can hydrate
|
||||||
// - ("", "") if no auth credentials provided (for public paths)
|
// `caller_jwt_subject` for serverless host functions (bug #215).
|
||||||
func (g *Gateway) validateAuthForNamespaceProxy(r *http.Request) (namespace string, errMsg string) {
|
// - (namespace, nil, "") if auth is valid via API key.
|
||||||
|
// - ("", nil, errorMessage) if auth is invalid.
|
||||||
|
// - ("", nil, "") if no auth credentials provided (for public paths).
|
||||||
|
func (g *Gateway) validateAuthForNamespaceProxy(r *http.Request) (namespace string, claims *auth.JWTClaims, errMsg string) {
|
||||||
// 1) Try JWT Bearer first
|
// 1) Try JWT Bearer first
|
||||||
if auth := r.Header.Get("Authorization"); auth != "" {
|
if authHeader := r.Header.Get("Authorization"); authHeader != "" {
|
||||||
lower := strings.ToLower(auth)
|
lower := strings.ToLower(authHeader)
|
||||||
if strings.HasPrefix(lower, "bearer ") {
|
if strings.HasPrefix(lower, "bearer ") {
|
||||||
tok := strings.TrimSpace(auth[len("Bearer "):])
|
tok := strings.TrimSpace(authHeader[len("Bearer "):])
|
||||||
if strings.Count(tok, ".") == 2 {
|
if strings.Count(tok, ".") == 2 {
|
||||||
if claims, err := g.authService.ParseAndVerifyJWT(tok); err == nil {
|
if c, err := g.authService.ParseAndVerifyJWT(tok); err == nil {
|
||||||
if ns := strings.TrimSpace(claims.Namespace); ns != "" {
|
if ns := strings.TrimSpace(c.Namespace); ns != "" {
|
||||||
return ns, ""
|
return ns, c, ""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// JWT verification failed - fall through to API key check
|
// JWT verification failed - fall through to API key check
|
||||||
@ -101,14 +190,14 @@ func (g *Gateway) validateAuthForNamespaceProxy(r *http.Request) (namespace stri
|
|||||||
// 2) Try API key
|
// 2) Try API key
|
||||||
key := extractAPIKey(r)
|
key := extractAPIKey(r)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
return "", "" // No credentials provided
|
return "", nil, "" // No credentials provided
|
||||||
}
|
}
|
||||||
|
|
||||||
ns, err := g.lookupAPIKeyNamespace(r.Context(), key, g.client)
|
ns, err := g.lookupAPIKeyNamespace(r.Context(), key, g.client)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", "invalid API key"
|
return "", nil, "invalid API key"
|
||||||
}
|
}
|
||||||
return ns, ""
|
return ns, nil, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// lookupAPIKeyNamespace resolves an API key to its namespace using cache and DB.
|
// lookupAPIKeyNamespace resolves an API key to its namespace using cache and DB.
|
||||||
@ -325,6 +414,17 @@ func (g *Gateway) authMiddleware(next http.Handler) http.Handler {
|
|||||||
if ns != "" {
|
if ns != "" {
|
||||||
// Pre-authenticated by main gateway - trust the namespace
|
// Pre-authenticated by main gateway - trust the namespace
|
||||||
reqCtx := context.WithValue(r.Context(), CtxKeyNamespaceOverride, ns)
|
reqCtx := context.WithValue(r.Context(), CtxKeyNamespaceOverride, ns)
|
||||||
|
// Bug #215: also rebuild ctxKeyJWT from the trusted
|
||||||
|
// internal-auth headers (subject + custom claims) so
|
||||||
|
// serverless host functions see a non-empty
|
||||||
|
// caller_jwt_subject. We deliberately do NOT re-verify
|
||||||
|
// the JWT here — namespace gateways may not share the
|
||||||
|
// signing key with the main gateway, and the main gateway
|
||||||
|
// already verified before forwarding. Trust gate is the
|
||||||
|
// source-IP check above.
|
||||||
|
if claims := claimsFromInternalAuthHeaders(r.Header, ns); claims != nil {
|
||||||
|
reqCtx = context.WithValue(reqCtx, ctxKeyJWT, claims)
|
||||||
|
}
|
||||||
next.ServeHTTP(w, r.WithContext(reqCtx))
|
next.ServeHTTP(w, r.WithContext(reqCtx))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -916,7 +1016,7 @@ func (g *Gateway) domainRoutingMiddleware(next http.Handler) http.Handler {
|
|||||||
func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.Request, namespaceName string) {
|
func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.Request, namespaceName string) {
|
||||||
// Validate auth against main cluster RQLite BEFORE proxying
|
// Validate auth against main cluster RQLite BEFORE proxying
|
||||||
// This ensures API keys work even though they're not in the namespace's RQLite
|
// This ensures API keys work even though they're not in the namespace's RQLite
|
||||||
validatedNamespace, authErr := g.validateAuthForNamespaceProxy(r)
|
validatedNamespace, validatedClaims, authErr := g.validateAuthForNamespaceProxy(r)
|
||||||
if authErr != "" && !isPublicPath(r.URL.Path) {
|
if authErr != "" && !isPublicPath(r.URL.Path) {
|
||||||
w.Header().Set("WWW-Authenticate", "Bearer error=\"invalid_token\"")
|
w.Header().Set("WWW-Authenticate", "Bearer error=\"invalid_token\"")
|
||||||
writeError(w, http.StatusUnauthorized, authErr)
|
writeError(w, http.StatusUnauthorized, authErr)
|
||||||
@ -1078,10 +1178,19 @@ func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.R
|
|||||||
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
||||||
r.Header.Set("X-Forwarded-Proto", "https")
|
r.Header.Set("X-Forwarded-Proto", "https")
|
||||||
r.Header.Set("X-Forwarded-Host", r.Host)
|
r.Header.Set("X-Forwarded-Host", r.Host)
|
||||||
|
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers
|
||||||
|
// the external client may have set BEFORE applying the trusted
|
||||||
|
// values from this gateway. Prevents identity spoofing on the
|
||||||
|
// namespace gateway, which gates on source IP only.
|
||||||
|
stripInboundInternalAuthHeaders(r.Header)
|
||||||
// Set internal auth headers if auth was validated
|
// Set internal auth headers if auth was validated
|
||||||
if validatedNamespace != "" {
|
if validatedNamespace != "" {
|
||||||
r.Header.Set(HeaderInternalAuthValidated, "true")
|
r.Header.Set(HeaderInternalAuthValidated, "true")
|
||||||
r.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
|
r.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
|
||||||
|
// Bug #215: forward validated JWT subject + custom claims so the
|
||||||
|
// namespace gateway's auth middleware can hydrate ctxKeyJWT and
|
||||||
|
// host functions see a non-empty caller_jwt_subject.
|
||||||
|
setInternalAuthJWTHeaders(r.Header, validatedClaims)
|
||||||
}
|
}
|
||||||
r.URL.Scheme = "http"
|
r.URL.Scheme = "http"
|
||||||
r.URL.Host = targetHost
|
r.URL.Host = targetHost
|
||||||
@ -1120,11 +1229,21 @@ func (g *Gateway) handleNamespaceGatewayRequest(w http.ResponseWriter, r *http.R
|
|||||||
proxyReq.Header.Set("X-Forwarded-Host", r.Host)
|
proxyReq.Header.Set("X-Forwarded-Host", r.Host)
|
||||||
proxyReq.Header.Set("X-Original-Host", r.Host)
|
proxyReq.Header.Set("X-Original-Host", r.Host)
|
||||||
|
|
||||||
|
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers
|
||||||
|
// the header-copy loop above may have inherited from the inbound
|
||||||
|
// request BEFORE writing the trusted values. The namespace gateway
|
||||||
|
// gates on source IP only; without this strip, an external attacker
|
||||||
|
// could forge X-Internal-Auth-JWT-Sub and impersonate any wallet.
|
||||||
|
stripInboundInternalAuthHeaders(proxyReq.Header)
|
||||||
// Set internal auth headers if auth was validated by main gateway
|
// Set internal auth headers if auth was validated by main gateway
|
||||||
// This allows the namespace gateway to trust the authentication
|
// This allows the namespace gateway to trust the authentication
|
||||||
if validatedNamespace != "" {
|
if validatedNamespace != "" {
|
||||||
proxyReq.Header.Set(HeaderInternalAuthValidated, "true")
|
proxyReq.Header.Set(HeaderInternalAuthValidated, "true")
|
||||||
proxyReq.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
|
proxyReq.Header.Set(HeaderInternalAuthNamespace, validatedNamespace)
|
||||||
|
// Bug #215: forward validated JWT subject + custom claims so the
|
||||||
|
// namespace gateway's auth middleware can hydrate ctxKeyJWT and
|
||||||
|
// host functions see a non-empty caller_jwt_subject.
|
||||||
|
setInternalAuthJWTHeaders(proxyReq.Header, validatedClaims)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pick the proxy timeout based on the path's expected work bound.
|
// Pick the proxy timeout based on the path's expected work bound.
|
||||||
@ -1353,6 +1472,11 @@ serveLocal:
|
|||||||
targetHost := "localhost:" + strconv.Itoa(deployment.Port)
|
targetHost := "localhost:" + strconv.Itoa(deployment.Port)
|
||||||
target := "http://" + targetHost
|
target := "http://" + targetHost
|
||||||
|
|
||||||
|
// SECURITY (bug #215 follow-up): drop X-Internal-Auth-* headers before
|
||||||
|
// forwarding to a customer-deployed app on localhost. The app has no
|
||||||
|
// reason to see Orama's internal-auth metadata, and stripping prevents
|
||||||
|
// any accidental leakage if the customer ever adds header-based trust.
|
||||||
|
stripInboundInternalAuthHeaders(r.Header)
|
||||||
// Set proxy headers
|
// Set proxy headers
|
||||||
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
||||||
r.Header.Set("X-Forwarded-Proto", "https")
|
r.Header.Set("X-Forwarded-Proto", "https")
|
||||||
@ -1465,6 +1589,12 @@ func (g *Gateway) proxyCrossNode(w http.ResponseWriter, r *http.Request, deploym
|
|||||||
|
|
||||||
// Handle WebSocket upgrade requests specially
|
// Handle WebSocket upgrade requests specially
|
||||||
if isWebSocketUpgrade(r) {
|
if isWebSocketUpgrade(r) {
|
||||||
|
// SECURITY (bug #215 follow-up): drop X-Internal-Auth-* headers from
|
||||||
|
// the inbound request before forwarding to another node. The target
|
||||||
|
// node's authMiddleware gates internal-auth trust on source IP only;
|
||||||
|
// without this strip, an external attacker could forge JWT-Sub here
|
||||||
|
// and have the home node trust it after the cross-node hop.
|
||||||
|
stripInboundInternalAuthHeaders(r.Header)
|
||||||
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
||||||
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
|
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
|
||||||
r.URL.Scheme = "http"
|
r.URL.Scheme = "http"
|
||||||
@ -1490,6 +1620,9 @@ func (g *Gateway) proxyCrossNode(w http.ResponseWriter, r *http.Request, deploym
|
|||||||
proxyReq.Header.Add(key, value)
|
proxyReq.Header.Add(key, value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// SECURITY (bug #215 follow-up): drop any X-Internal-Auth-* headers the
|
||||||
|
// inbound request may have carried. See WS branch above.
|
||||||
|
stripInboundInternalAuthHeaders(proxyReq.Header)
|
||||||
proxyReq.Host = r.Host // Keep original host for domain routing on target node
|
proxyReq.Host = r.Host // Keep original host for domain routing on target node
|
||||||
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
|
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
|
||||||
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) // Prevent loops
|
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID) // Prevent loops
|
||||||
@ -1590,6 +1723,8 @@ func (g *Gateway) proxyCrossNodeToIP(w http.ResponseWriter, r *http.Request, dep
|
|||||||
|
|
||||||
// Handle WebSocket upgrade requests specially
|
// Handle WebSocket upgrade requests specially
|
||||||
if isWebSocketUpgrade(r) {
|
if isWebSocketUpgrade(r) {
|
||||||
|
// SECURITY (bug #215 follow-up): see proxyCrossNode for rationale.
|
||||||
|
stripInboundInternalAuthHeaders(r.Header)
|
||||||
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
r.Header.Set("X-Forwarded-For", getClientIP(r))
|
||||||
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
|
r.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
|
||||||
r.URL.Scheme = "http"
|
r.URL.Scheme = "http"
|
||||||
@ -1613,6 +1748,8 @@ func (g *Gateway) proxyCrossNodeToIP(w http.ResponseWriter, r *http.Request, dep
|
|||||||
proxyReq.Header.Add(key, value)
|
proxyReq.Header.Add(key, value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// SECURITY (bug #215 follow-up): see proxyCrossNode for rationale.
|
||||||
|
stripInboundInternalAuthHeaders(proxyReq.Header)
|
||||||
proxyReq.Host = r.Host
|
proxyReq.Host = r.Host
|
||||||
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
|
proxyReq.Header.Set("X-Forwarded-For", getClientIP(r))
|
||||||
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
|
proxyReq.Header.Set("X-Orama-Proxy-Node", g.nodePeerID)
|
||||||
|
|||||||
@ -62,3 +62,27 @@ func (g *Gateway) pushSendHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
g.pushHandlers.SendHandler(w, r)
|
g.pushHandlers.SendHandler(w, r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pushConfigHandler dispatches GET / PUT / DELETE on /v1/push/config — the
|
||||||
|
// tenant-self-service entrypoint for per-namespace push provider config
|
||||||
|
// (bug #220 follow-up). When push is fully disabled returns 503 with the
|
||||||
|
// same actionable message as the device endpoints.
|
||||||
|
func (g *Gateway) pushConfigHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if g.pushHandlers == nil {
|
||||||
|
httputil.WriteRPCError(w, http.StatusServiceUnavailable,
|
||||||
|
httputil.ErrCodeServiceUnavailable, pushNotConfiguredMessage)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
switch r.Method {
|
||||||
|
case http.MethodGet:
|
||||||
|
g.pushHandlers.GetConfigHandler(w, r)
|
||||||
|
case http.MethodPut, http.MethodPost:
|
||||||
|
g.pushHandlers.PutConfigHandler(w, r)
|
||||||
|
case http.MethodDelete:
|
||||||
|
g.pushHandlers.DeleteConfigHandler(w, r)
|
||||||
|
default:
|
||||||
|
httputil.WriteRPCError(w, http.StatusMethodNotAllowed,
|
||||||
|
httputil.ErrCodeValidationFailed,
|
||||||
|
"method not allowed: use GET to read, PUT to update, or DELETE to clear")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -139,6 +139,11 @@ func (g *Gateway) Routes() http.Handler {
|
|||||||
mux.HandleFunc("/v1/push/devices/", g.pushDevicesByIDHandler)
|
mux.HandleFunc("/v1/push/devices/", g.pushDevicesByIDHandler)
|
||||||
mux.HandleFunc("/v1/push/send", g.pushSendHandler)
|
mux.HandleFunc("/v1/push/send", g.pushSendHandler)
|
||||||
|
|
||||||
|
// Per-namespace push provider configuration (bug #220 follow-up):
|
||||||
|
// GET / PUT / DELETE — tenants self-serve their ntfy/expo credentials
|
||||||
|
// instead of filing an ops ticket. Method dispatched in the handler.
|
||||||
|
mux.HandleFunc("/v1/push/config", g.pushConfigHandler)
|
||||||
|
|
||||||
// operator node management (wallet JWT auth via middleware)
|
// operator node management (wallet JWT auth via middleware)
|
||||||
if g.operatorHandler != nil {
|
if g.operatorHandler != nil {
|
||||||
mux.HandleFunc("/v1/operator/invite", g.operatorHandler.HandleInvite)
|
mux.HandleFunc("/v1/operator/invite", g.operatorHandler.HandleInvite)
|
||||||
|
|||||||
@ -4,19 +4,27 @@ import (
|
|||||||
"crypto/ed25519"
|
"crypto/ed25519"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"crypto/rsa"
|
"crypto/rsa"
|
||||||
|
"crypto/sha256"
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
"encoding/pem"
|
"encoding/pem"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/DeBrosOfficial/network/pkg/logging"
|
"github.com/DeBrosOfficial/network/pkg/logging"
|
||||||
"go.uber.org/zap"
|
"go.uber.org/zap"
|
||||||
|
"golang.org/x/crypto/hkdf"
|
||||||
)
|
)
|
||||||
|
|
||||||
const jwtKeyFileName = "jwt-signing-key.pem"
|
const jwtKeyFileName = "jwt-signing-key.pem"
|
||||||
const eddsaKeyFileName = "jwt-eddsa-key.pem"
|
const eddsaKeyFileName = "jwt-eddsa-key.pem"
|
||||||
|
|
||||||
|
// jwtEdDSADerivePurpose is the HKDF info string used to derive the cluster-wide
|
||||||
|
// Ed25519 JWT signing seed from the cluster secret. Bumping the version label
|
||||||
|
// (e.g. "...-v2") on disk = full re-derive + invalidates old tokens.
|
||||||
|
const jwtEdDSADerivePurpose = "orama-jwt-eddsa-v1"
|
||||||
|
|
||||||
// loadOrCreateSigningKey loads the JWT signing key from disk, or generates a new one
|
// loadOrCreateSigningKey loads the JWT signing key from disk, or generates a new one
|
||||||
// if none exists. This ensures JWTs survive gateway restarts.
|
// if none exists. This ensures JWTs survive gateway restarts.
|
||||||
func loadOrCreateSigningKey(dataDir string, logger *logging.ColoredLogger) ([]byte, error) {
|
func loadOrCreateSigningKey(dataDir string, logger *logging.ColoredLogger) ([]byte, error) {
|
||||||
@ -65,10 +73,37 @@ func loadOrCreateSigningKey(dataDir string, logger *logging.ColoredLogger) ([]by
|
|||||||
}
|
}
|
||||||
|
|
||||||
// loadOrCreateEdSigningKey loads or generates an Ed25519 private key for EdDSA JWT signing.
|
// loadOrCreateEdSigningKey loads or generates an Ed25519 private key for EdDSA JWT signing.
|
||||||
// The key is stored as a PKCS8-encoded PEM file alongside the RSA key.
|
//
|
||||||
func loadOrCreateEdSigningKey(dataDir string, logger *logging.ColoredLogger) (ed25519.PrivateKey, error) {
|
// Bug #215 fix: when a non-empty clusterSecret is provided, the key is derived
|
||||||
|
// DETERMINISTICALLY from the cluster secret using HKDF-SHA256. This means every
|
||||||
|
// gateway in the cluster ends up with the same Ed25519 keypair, so a JWT signed
|
||||||
|
// by one gateway can be verified by any other gateway. Without this, each
|
||||||
|
// gateway minted its own random key on first boot, JWTs were unverifiable
|
||||||
|
// cross-gateway, and host functions saw empty `caller_jwt_subject` whenever
|
||||||
|
// invoke landed on a different node than `/v1/auth/login`.
|
||||||
|
//
|
||||||
|
// When clusterSecret is empty (single-node test rigs, no shared secret),
|
||||||
|
// falls back to the legacy random-per-node behaviour.
|
||||||
|
//
|
||||||
|
// On-disk PEM is the source of truth at runtime; if it doesn't match the
|
||||||
|
// derivation (e.g. left over from before this fix, or cluster secret rotated
|
||||||
|
// the label), the file is rewritten with the canonical key. Old tokens become
|
||||||
|
// unverifiable but JWT lifetime is short (15 min) so the disruption is bounded.
|
||||||
|
func loadOrCreateEdSigningKey(dataDir, clusterSecret string, logger *logging.ColoredLogger) (ed25519.PrivateKey, error) {
|
||||||
keyPath := filepath.Join(dataDir, "secrets", eddsaKeyFileName)
|
keyPath := filepath.Join(dataDir, "secrets", eddsaKeyFileName)
|
||||||
|
|
||||||
|
// Compute the canonical key for this cluster (if a secret is available).
|
||||||
|
// Empty clusterSecret = legacy mode, no canonical key, just use whatever
|
||||||
|
// is on disk or freshly generated.
|
||||||
|
var canonical ed25519.PrivateKey
|
||||||
|
if clusterSecret != "" {
|
||||||
|
seed, err := deriveEd25519Seed(clusterSecret)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("derive Ed25519 seed from cluster secret: %w", err)
|
||||||
|
}
|
||||||
|
canonical = ed25519.NewKeyFromSeed(seed)
|
||||||
|
}
|
||||||
|
|
||||||
// Try to load existing key
|
// Try to load existing key
|
||||||
if keyPEM, err := os.ReadFile(keyPath); err == nil && len(keyPEM) > 0 {
|
if keyPEM, err := os.ReadFile(keyPath); err == nil && len(keyPEM) > 0 {
|
||||||
block, _ := pem.Decode(keyPEM)
|
block, _ := pem.Decode(keyPEM)
|
||||||
@ -76,20 +111,38 @@ func loadOrCreateEdSigningKey(dataDir string, logger *logging.ColoredLogger) (ed
|
|||||||
parsed, err := x509.ParsePKCS8PrivateKey(block.Bytes)
|
parsed, err := x509.ParsePKCS8PrivateKey(block.Bytes)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if edKey, ok := parsed.(ed25519.PrivateKey); ok {
|
if edKey, ok := parsed.(ed25519.PrivateKey); ok {
|
||||||
logger.ComponentInfo(logging.ComponentGeneral, "Loaded existing EdDSA signing key",
|
// If we have a canonical cluster-derived key, the on-disk
|
||||||
zap.String("path", keyPath))
|
// key MUST match it. Otherwise we'd silently keep using a
|
||||||
return edKey, nil
|
// per-node random key and JWTs wouldn't verify cross-node.
|
||||||
|
if canonical != nil && !ed25519.PrivateKey(edKey).Equal(canonical) {
|
||||||
|
logger.ComponentWarn(logging.ComponentGeneral,
|
||||||
|
"On-disk EdDSA key does not match cluster-derived key; rewriting from cluster secret",
|
||||||
|
zap.String("path", keyPath))
|
||||||
|
// Fall through to write canonical below.
|
||||||
|
} else {
|
||||||
|
logger.ComponentInfo(logging.ComponentGeneral, "Loaded existing EdDSA signing key",
|
||||||
|
zap.String("path", keyPath))
|
||||||
|
return edKey, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logger.ComponentWarn(logging.ComponentGeneral, "Existing EdDSA signing key is invalid, generating new one",
|
if canonical == nil {
|
||||||
zap.String("path", keyPath))
|
logger.ComponentWarn(logging.ComponentGeneral, "Existing EdDSA signing key is invalid, generating new one",
|
||||||
|
zap.String("path", keyPath))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate new Ed25519 key
|
// Either nothing on disk, key was unparseable, or it didn't match the
|
||||||
_, priv, err := ed25519.GenerateKey(rand.Reader)
|
// canonical cluster-derived key. Use canonical when available; otherwise
|
||||||
if err != nil {
|
// generate a random one (legacy single-node fallback).
|
||||||
return nil, fmt.Errorf("generate Ed25519 key: %w", err)
|
priv := canonical
|
||||||
|
if priv == nil {
|
||||||
|
_, generated, err := ed25519.GenerateKey(rand.Reader)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("generate Ed25519 key: %w", err)
|
||||||
|
}
|
||||||
|
priv = generated
|
||||||
}
|
}
|
||||||
|
|
||||||
pkcs8Bytes, err := x509.MarshalPKCS8PrivateKey(priv)
|
pkcs8Bytes, err := x509.MarshalPKCS8PrivateKey(priv)
|
||||||
@ -112,7 +165,27 @@ func loadOrCreateEdSigningKey(dataDir string, logger *logging.ColoredLogger) (ed
|
|||||||
return nil, fmt.Errorf("write EdDSA signing key: %w", err)
|
return nil, fmt.Errorf("write EdDSA signing key: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.ComponentInfo(logging.ComponentGeneral, "Generated and saved new EdDSA signing key",
|
source := "random per-node"
|
||||||
zap.String("path", keyPath))
|
if canonical != nil {
|
||||||
|
source = "cluster-derived (HKDF)"
|
||||||
|
}
|
||||||
|
logger.ComponentInfo(logging.ComponentGeneral, "Saved EdDSA signing key",
|
||||||
|
zap.String("path", keyPath),
|
||||||
|
zap.String("source", source))
|
||||||
return priv, nil
|
return priv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// deriveEd25519Seed derives a deterministic 32-byte seed for Ed25519 from the
|
||||||
|
// cluster secret using HKDF-SHA256 with a stable purpose label. Same secret +
|
||||||
|
// same label = same seed = same keypair on every gateway in the cluster.
|
||||||
|
func deriveEd25519Seed(clusterSecret string) ([]byte, error) {
|
||||||
|
if clusterSecret == "" {
|
||||||
|
return nil, fmt.Errorf("cluster secret is empty")
|
||||||
|
}
|
||||||
|
reader := hkdf.New(sha256.New, []byte(clusterSecret), nil, []byte(jwtEdDSADerivePurpose))
|
||||||
|
seed := make([]byte, ed25519.SeedSize)
|
||||||
|
if _, err := io.ReadFull(reader, seed); err != nil {
|
||||||
|
return nil, fmt.Errorf("HKDF read failed: %w", err)
|
||||||
|
}
|
||||||
|
return seed, nil
|
||||||
|
}
|
||||||
|
|||||||
147
core/pkg/gateway/signing_key_test.go
Normal file
147
core/pkg/gateway/signing_key_test.go
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
package gateway
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/ed25519"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/logging"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestDeriveEd25519Seed_deterministic guards bug #215: every gateway in a
|
||||||
|
// cluster must end up with the same Ed25519 keypair so JWTs verify on any
|
||||||
|
// node. Same cluster secret + same purpose label MUST produce the same seed.
|
||||||
|
func TestDeriveEd25519Seed_deterministic(t *testing.T) {
|
||||||
|
a, err := deriveEd25519Seed("super-secret-cluster-key")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("derive #1: %v", err)
|
||||||
|
}
|
||||||
|
b, err := deriveEd25519Seed("super-secret-cluster-key")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("derive #2: %v", err)
|
||||||
|
}
|
||||||
|
if len(a) != ed25519.SeedSize {
|
||||||
|
t.Errorf("seed size = %d, want %d", len(a), ed25519.SeedSize)
|
||||||
|
}
|
||||||
|
if string(a) != string(b) {
|
||||||
|
t.Error("seed not deterministic for same secret")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestDeriveEd25519Seed_differentSecretsDifferentSeeds rules out a trivial
|
||||||
|
// implementation that ignores the input.
|
||||||
|
func TestDeriveEd25519Seed_differentSecretsDifferentSeeds(t *testing.T) {
|
||||||
|
a, err := deriveEd25519Seed("secret-a")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("derive a: %v", err)
|
||||||
|
}
|
||||||
|
b, err := deriveEd25519Seed("secret-b")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("derive b: %v", err)
|
||||||
|
}
|
||||||
|
if string(a) == string(b) {
|
||||||
|
t.Error("different secrets produced identical seed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveEd25519Seed_emptySecret(t *testing.T) {
|
||||||
|
if _, err := deriveEd25519Seed(""); err == nil {
|
||||||
|
t.Error("expected error for empty cluster secret, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestLoadOrCreateEdSigningKey_clusterSecretSharedAcrossNodes simulates two
|
||||||
|
// gateways with separate dataDirs but the same cluster secret. They MUST end
|
||||||
|
// up with the same Ed25519 private key so JWTs verify cross-node.
|
||||||
|
func TestLoadOrCreateEdSigningKey_clusterSecretSharedAcrossNodes(t *testing.T) {
|
||||||
|
dirA := t.TempDir()
|
||||||
|
dirB := t.TempDir()
|
||||||
|
const clusterSecret = "shared-cluster-secret-for-test"
|
||||||
|
|
||||||
|
logger, _ := logging.NewColoredLogger(logging.ComponentGeneral, false)
|
||||||
|
|
||||||
|
keyA, err := loadOrCreateEdSigningKey(dirA, clusterSecret, logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("node A: %v", err)
|
||||||
|
}
|
||||||
|
keyB, err := loadOrCreateEdSigningKey(dirB, clusterSecret, logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("node B: %v", err)
|
||||||
|
}
|
||||||
|
if !ed25519.PrivateKey(keyA).Equal(keyB) {
|
||||||
|
t.Fatal("two nodes with same cluster secret produced different Ed25519 keys")
|
||||||
|
}
|
||||||
|
// PEMs should also be persisted.
|
||||||
|
if _, err := os.Stat(filepath.Join(dirA, "secrets", eddsaKeyFileName)); err != nil {
|
||||||
|
t.Errorf("PEM not written on node A: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(filepath.Join(dirB, "secrets", eddsaKeyFileName)); err != nil {
|
||||||
|
t.Errorf("PEM not written on node B: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestLoadOrCreateEdSigningKey_emptySecretFallback verifies the legacy
|
||||||
|
// per-node behaviour is preserved when no cluster secret is available
|
||||||
|
// (single-node test rigs, dev). Two nodes get DIFFERENT random keys.
|
||||||
|
func TestLoadOrCreateEdSigningKey_emptySecretFallback(t *testing.T) {
|
||||||
|
dirA := t.TempDir()
|
||||||
|
dirB := t.TempDir()
|
||||||
|
logger, _ := logging.NewColoredLogger(logging.ComponentGeneral, false)
|
||||||
|
|
||||||
|
keyA, err := loadOrCreateEdSigningKey(dirA, "", logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("node A: %v", err)
|
||||||
|
}
|
||||||
|
keyB, err := loadOrCreateEdSigningKey(dirB, "", logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("node B: %v", err)
|
||||||
|
}
|
||||||
|
if ed25519.PrivateKey(keyA).Equal(keyB) {
|
||||||
|
t.Error("two nodes without cluster secret unexpectedly produced identical keys")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestLoadOrCreateEdSigningKey_overwritesStaleOnDiskKey covers the upgrade
|
||||||
|
// path: a gateway that previously generated a per-node random key (fix #215
|
||||||
|
// not yet deployed) now restarts with cluster-secret derivation enabled. The
|
||||||
|
// random key on disk MUST be replaced with the canonical cluster-derived one.
|
||||||
|
func TestLoadOrCreateEdSigningKey_overwritesStaleOnDiskKey(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
logger, _ := logging.NewColoredLogger(logging.ComponentGeneral, false)
|
||||||
|
|
||||||
|
// First boot: no cluster secret -> per-node random key.
|
||||||
|
keyV1, err := loadOrCreateEdSigningKey(dir, "", logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("v1: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second boot: cluster secret now configured -> must rewrite to canonical.
|
||||||
|
const clusterSecret = "now-i-have-a-secret"
|
||||||
|
keyV2, err := loadOrCreateEdSigningKey(dir, clusterSecret, logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("v2: %v", err)
|
||||||
|
}
|
||||||
|
if ed25519.PrivateKey(keyV1).Equal(keyV2) {
|
||||||
|
t.Fatal("stale per-node key was not replaced when cluster secret became available")
|
||||||
|
}
|
||||||
|
|
||||||
|
// And the new key must match a fresh derivation from the same secret.
|
||||||
|
seed, err := deriveEd25519Seed(clusterSecret)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("derive seed: %v", err)
|
||||||
|
}
|
||||||
|
canonical := ed25519.NewKeyFromSeed(seed)
|
||||||
|
if !ed25519.PrivateKey(keyV2).Equal(canonical) {
|
||||||
|
t.Error("rewritten key does not match canonical derivation")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Third boot with same secret: must be stable, no rewrite, same key.
|
||||||
|
keyV3, err := loadOrCreateEdSigningKey(dir, clusterSecret, logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("v3: %v", err)
|
||||||
|
}
|
||||||
|
if !ed25519.PrivateKey(keyV2).Equal(keyV3) {
|
||||||
|
t.Error("canonical key is not stable across restarts")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -39,6 +39,12 @@ type ClusterManagerConfig struct {
|
|||||||
// in RQLite. Derived from cluster secret via HKDF(clusterSecret, "turn-encryption").
|
// in RQLite. Derived from cluster secret via HKDF(clusterSecret, "turn-encryption").
|
||||||
// If nil, TURN secrets are stored in plaintext (backward compatibility).
|
// If nil, TURN secrets are stored in plaintext (backward compatibility).
|
||||||
TurnEncryptionKey []byte
|
TurnEncryptionKey []byte
|
||||||
|
|
||||||
|
// ClusterSecretPath is the host's cluster-secret file path. Forwarded
|
||||||
|
// to spawned namespace gateways via YAML so they can derive the
|
||||||
|
// cluster-wide JWT signing key (bug #215 fix). Empty string disables
|
||||||
|
// cross-node JWT verification within namespace clusters.
|
||||||
|
ClusterSecretPath string
|
||||||
}
|
}
|
||||||
|
|
||||||
// ClusterManager orchestrates namespace cluster provisioning and lifecycle
|
// ClusterManager orchestrates namespace cluster provisioning and lifecycle
|
||||||
@ -81,7 +87,7 @@ func NewClusterManager(
|
|||||||
portAllocator := NewNamespacePortAllocator(db, logger)
|
portAllocator := NewNamespacePortAllocator(db, logger)
|
||||||
webrtcPortAllocator := NewWebRTCPortAllocator(db, logger)
|
webrtcPortAllocator := NewWebRTCPortAllocator(db, logger)
|
||||||
nodeSelector := NewClusterNodeSelector(db, portAllocator, logger)
|
nodeSelector := NewClusterNodeSelector(db, portAllocator, logger)
|
||||||
systemdSpawner := NewSystemdSpawner(cfg.BaseDataDir, logger)
|
systemdSpawner := NewSystemdSpawner(cfg.BaseDataDir, cfg.ClusterSecretPath, logger)
|
||||||
dnsManager := NewDNSRecordManager(db, cfg.BaseDomain, logger)
|
dnsManager := NewDNSRecordManager(db, cfg.BaseDomain, logger)
|
||||||
|
|
||||||
// Set IPFS defaults
|
// Set IPFS defaults
|
||||||
|
|||||||
@ -22,15 +22,28 @@ import (
|
|||||||
type SystemdSpawner struct {
|
type SystemdSpawner struct {
|
||||||
systemdMgr *systemd.Manager
|
systemdMgr *systemd.Manager
|
||||||
namespaceBase string
|
namespaceBase string
|
||||||
logger *zap.Logger
|
// clusterSecretPath is the host's cluster-secret file path; written
|
||||||
|
// into spawned namespace gateways' YAML so they can derive the
|
||||||
|
// cluster-wide JWT signing key (bug #215). Empty string means the host
|
||||||
|
// has no cluster secret available — namespace gateways will fall back
|
||||||
|
// to per-node random keys and JWTs won't verify cross-node.
|
||||||
|
clusterSecretPath string
|
||||||
|
logger *zap.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewSystemdSpawner creates a new systemd-based spawner
|
// NewSystemdSpawner creates a new systemd-based spawner.
|
||||||
func NewSystemdSpawner(namespaceBase string, logger *zap.Logger) *SystemdSpawner {
|
//
|
||||||
|
// clusterSecretPath should point to the host node's cluster-secret file
|
||||||
|
// (typically `<oramaDir>/secrets/cluster-secret`). It is written into each
|
||||||
|
// spawned namespace gateway's YAML config so the gateway can read it on
|
||||||
|
// startup. Pass "" only if no cluster secret exists on this host (legacy
|
||||||
|
// single-node test deployments).
|
||||||
|
func NewSystemdSpawner(namespaceBase, clusterSecretPath string, logger *zap.Logger) *SystemdSpawner {
|
||||||
return &SystemdSpawner{
|
return &SystemdSpawner{
|
||||||
systemdMgr: systemd.NewManager(namespaceBase, logger),
|
systemdMgr: systemd.NewManager(namespaceBase, logger),
|
||||||
namespaceBase: namespaceBase,
|
namespaceBase: namespaceBase,
|
||||||
logger: logger.With(zap.String("component", "systemd-spawner")),
|
clusterSecretPath: clusterSecretPath,
|
||||||
|
logger: logger.With(zap.String("component", "systemd-spawner")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,6 +222,12 @@ func (s *SystemdSpawner) SpawnGateway(ctx context.Context, namespace, nodeID str
|
|||||||
IPFSAPIURL: cfg.IPFSAPIURL,
|
IPFSAPIURL: cfg.IPFSAPIURL,
|
||||||
IPFSTimeout: cfg.IPFSTimeout.String(),
|
IPFSTimeout: cfg.IPFSTimeout.String(),
|
||||||
IPFSReplicationFactor: cfg.IPFSReplicationFactor,
|
IPFSReplicationFactor: cfg.IPFSReplicationFactor,
|
||||||
|
// Bug #215 fix: forward the host's cluster secret path so the
|
||||||
|
// spawned namespace gateway can derive the cluster-wide JWT
|
||||||
|
// signing key. Without this, namespace gateways used per-node
|
||||||
|
// random Ed25519 keys and host functions saw empty
|
||||||
|
// caller_jwt_subject.
|
||||||
|
ClusterSecretPath: s.clusterSecretPath,
|
||||||
WebRTC: gateway.GatewayYAMLWebRTC{
|
WebRTC: gateway.GatewayYAMLWebRTC{
|
||||||
Enabled: cfg.WebRTCEnabled,
|
Enabled: cfg.WebRTCEnabled,
|
||||||
SFUPort: cfg.SFUPort,
|
SFUPort: cfg.SFUPort,
|
||||||
|
|||||||
@ -98,6 +98,17 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bug #215 fix: tell the namespace cluster manager where the
|
||||||
|
// host's cluster-secret file lives. Spawned namespace gateways
|
||||||
|
// read it on startup to derive the cluster-wide Ed25519 JWT
|
||||||
|
// signing key. The path resolves to the same file the host
|
||||||
|
// gateway reads above (line ~45) — keeps the secret on disk
|
||||||
|
// once, just referenced from the namespace gateway YAML.
|
||||||
|
clusterSecretPath := ""
|
||||||
|
if clusterSecret != "" {
|
||||||
|
clusterSecretPath = filepath.Join(oramaDir, "secrets", "cluster-secret")
|
||||||
|
}
|
||||||
|
|
||||||
clusterCfg := namespace.ClusterManagerConfig{
|
clusterCfg := namespace.ClusterManagerConfig{
|
||||||
BaseDomain: n.config.HTTPGateway.BaseDomain,
|
BaseDomain: n.config.HTTPGateway.BaseDomain,
|
||||||
BaseDataDir: baseDataDir,
|
BaseDataDir: baseDataDir,
|
||||||
@ -107,6 +118,7 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
|
|||||||
IPFSTimeout: gwCfg.IPFSTimeout,
|
IPFSTimeout: gwCfg.IPFSTimeout,
|
||||||
IPFSReplicationFactor: n.config.Database.IPFS.ReplicationFactor,
|
IPFSReplicationFactor: n.config.Database.IPFS.ReplicationFactor,
|
||||||
TurnEncryptionKey: turnEncKey,
|
TurnEncryptionKey: turnEncKey,
|
||||||
|
ClusterSecretPath: clusterSecretPath,
|
||||||
}
|
}
|
||||||
clusterManager := namespace.NewClusterManager(ormClient, clusterCfg, n.logger.Logger)
|
clusterManager := namespace.NewClusterManager(ormClient, clusterCfg, n.logger.Logger)
|
||||||
clusterManager.SetLocalNodeID(gwCfg.NodePeerID)
|
clusterManager.SetLocalNodeID(gwCfg.NodePeerID)
|
||||||
@ -114,8 +126,11 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
|
|||||||
apiGateway.SetNodeRecoverer(clusterManager)
|
apiGateway.SetNodeRecoverer(clusterManager)
|
||||||
apiGateway.SetWebRTCManager(clusterManager)
|
apiGateway.SetWebRTCManager(clusterManager)
|
||||||
|
|
||||||
// Wire spawn handler for distributed namespace instance spawning
|
// Wire spawn handler for distributed namespace instance spawning.
|
||||||
systemdSpawner := namespace.NewSystemdSpawner(baseDataDir, n.logger.Logger)
|
// Forwards the host's cluster_secret_path through to spawned
|
||||||
|
// namespace gateways (bug #215 fix; same rationale as the
|
||||||
|
// ClusterManager spawner above).
|
||||||
|
systemdSpawner := namespace.NewSystemdSpawner(baseDataDir, clusterSecretPath, n.logger.Logger)
|
||||||
spawnHandler := namespacehandlers.NewSpawnHandler(systemdSpawner, n.logger.Logger)
|
spawnHandler := namespacehandlers.NewSpawnHandler(systemdSpawner, n.logger.Logger)
|
||||||
apiGateway.SetSpawnHandler(spawnHandler)
|
apiGateway.SetSpawnHandler(spawnHandler)
|
||||||
|
|
||||||
|
|||||||
232
core/pkg/push/config_store.go
Normal file
232
core/pkg/push/config_store.go
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
package push
|
||||||
|
|
||||||
|
// config_store.go — per-namespace push provider configuration with
|
||||||
|
// encrypted credential storage.
|
||||||
|
//
|
||||||
|
// Tenants set their own ntfy / expo credentials via the HTTP config
|
||||||
|
// endpoint; this store persists them in RQLite (`namespace_push_config`),
|
||||||
|
// encrypting sensitive fields at rest using a key derived from the
|
||||||
|
// cluster secret. The gateway YAML config remains as a global fallback —
|
||||||
|
// a row here OVERRIDES the YAML for that namespace, absent rows inherit
|
||||||
|
// YAML defaults.
|
||||||
|
//
|
||||||
|
// See bug #220 follow-up. Pattern intentionally generic (mirrors
|
||||||
|
// push_devices encryption + storage), so the next "tenant should
|
||||||
|
// self-serve this knob" need (rate limits, custom domains, etc.) can
|
||||||
|
// drop a similar table + handler in.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/rqlite"
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/secrets"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// purposeNamespacePushConfig is the HKDF "purpose" string for the
|
||||||
|
// per-namespace push-config encryption key. Distinct from other purposes
|
||||||
|
// so a key compromise in one domain doesn't compromise this one.
|
||||||
|
const purposeNamespacePushConfig = "namespace-push-config"
|
||||||
|
|
||||||
|
// Config is the in-memory representation of one namespace's push provider
|
||||||
|
// configuration. Sensitive fields are PLAINTEXT in this struct — encryption
|
||||||
|
// happens at the storage boundary.
|
||||||
|
type Config struct {
|
||||||
|
Namespace string
|
||||||
|
NtfyBaseURL string
|
||||||
|
NtfyAuthToken string
|
||||||
|
ExpoAccessToken string
|
||||||
|
UpdatedAt int64
|
||||||
|
UpdatedBy string
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsEmpty returns true when this config has no provider credentials set —
|
||||||
|
// the namespace is effectively "no push" and the manager should fall back
|
||||||
|
// to the gateway YAML defaults (or refuse if those are also empty).
|
||||||
|
func (c *Config) IsEmpty() bool {
|
||||||
|
if c == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return c.NtfyBaseURL == "" && c.ExpoAccessToken == ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Redacted returns a copy of this Config safe for return over HTTP — the
|
||||||
|
// auth-token / access-token fields are zeroed out and replaced with a
|
||||||
|
// boolean (`HasNtfyAuthToken` / `HasExpoAccessToken`) at the response
|
||||||
|
// shape level. Use this to avoid ever sending the secrets back.
|
||||||
|
type RedactedConfig struct {
|
||||||
|
Namespace string `json:"namespace"`
|
||||||
|
NtfyBaseURL string `json:"ntfy_base_url,omitempty"`
|
||||||
|
HasNtfyAuthToken bool `json:"has_ntfy_auth_token"`
|
||||||
|
HasExpoAccessToken bool `json:"has_expo_access_token"`
|
||||||
|
UpdatedAt int64 `json:"updated_at,omitempty"`
|
||||||
|
UpdatedBy string `json:"updated_by,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Redacted returns the redacted view safe for GET responses.
|
||||||
|
func (c *Config) Redacted() RedactedConfig {
|
||||||
|
if c == nil {
|
||||||
|
return RedactedConfig{}
|
||||||
|
}
|
||||||
|
return RedactedConfig{
|
||||||
|
Namespace: c.Namespace,
|
||||||
|
NtfyBaseURL: c.NtfyBaseURL,
|
||||||
|
HasNtfyAuthToken: c.NtfyAuthToken != "",
|
||||||
|
HasExpoAccessToken: c.ExpoAccessToken != "",
|
||||||
|
UpdatedAt: c.UpdatedAt,
|
||||||
|
UpdatedBy: c.UpdatedBy,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ConfigStore is the persistence layer for per-namespace push config.
|
||||||
|
// Errors returned for "no row for namespace" → ErrConfigNotFound.
|
||||||
|
type ConfigStore interface {
|
||||||
|
Get(ctx context.Context, namespace string) (*Config, error)
|
||||||
|
Upsert(ctx context.Context, cfg Config) error
|
||||||
|
Delete(ctx context.Context, namespace string) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrConfigNotFound is returned by Get when no row exists for the
|
||||||
|
// namespace. Callers fall back to YAML defaults in that case.
|
||||||
|
var ErrConfigNotFound = errors.New("push config not found for namespace")
|
||||||
|
|
||||||
|
// rqliteConfigStore implements ConfigStore over RQLite + pkg/secrets.
|
||||||
|
type rqliteConfigStore struct {
|
||||||
|
db rqlite.Client
|
||||||
|
encKey []byte
|
||||||
|
logger *zap.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRqliteConfigStore wires the store to RQLite with a cluster-secret-
|
||||||
|
// derived encryption key for credential fields.
|
||||||
|
func NewRqliteConfigStore(db rqlite.Client, clusterSecret string, logger *zap.Logger) (ConfigStore, error) {
|
||||||
|
if clusterSecret == "" {
|
||||||
|
return nil, fmt.Errorf("push config store: cluster secret required for credential encryption")
|
||||||
|
}
|
||||||
|
key, err := secrets.DeriveKey(clusterSecret, purposeNamespacePushConfig)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("push config store: derive key: %w", err)
|
||||||
|
}
|
||||||
|
if logger == nil {
|
||||||
|
logger = zap.NewNop()
|
||||||
|
}
|
||||||
|
return &rqliteConfigStore{db: db, encKey: key, logger: logger}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get returns the namespace's config, decrypting sensitive fields. Returns
|
||||||
|
// ErrConfigNotFound if no row exists — caller decides whether to fall
|
||||||
|
// back to a default.
|
||||||
|
func (s *rqliteConfigStore) Get(ctx context.Context, namespace string) (*Config, error) {
|
||||||
|
if namespace == "" {
|
||||||
|
return nil, fmt.Errorf("push config: namespace required")
|
||||||
|
}
|
||||||
|
const q = `SELECT namespace, ntfy_base_url, ntfy_auth_token_encrypted,
|
||||||
|
expo_access_token_encrypted, updated_at, updated_by
|
||||||
|
FROM namespace_push_config WHERE namespace = ? LIMIT 1`
|
||||||
|
|
||||||
|
var rows []struct {
|
||||||
|
Namespace string `db:"namespace"`
|
||||||
|
NtfyBaseURL string `db:"ntfy_base_url"`
|
||||||
|
NtfyAuthTokenEncrypted string `db:"ntfy_auth_token_encrypted"`
|
||||||
|
ExpoAccessTokenEncrypted string `db:"expo_access_token_encrypted"`
|
||||||
|
UpdatedAt int64 `db:"updated_at"`
|
||||||
|
UpdatedBy string `db:"updated_by"`
|
||||||
|
}
|
||||||
|
if err := s.db.Query(ctx, &rows, q, namespace); err != nil {
|
||||||
|
// gorqlite stdlib path collapses "0 rows" to nil err; only real
|
||||||
|
// errors get here. ErrNoRows from sql is also treated below.
|
||||||
|
if errors.Is(err, sql.ErrNoRows) {
|
||||||
|
return nil, ErrConfigNotFound
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("push config: query: %w", err)
|
||||||
|
}
|
||||||
|
if len(rows) == 0 {
|
||||||
|
return nil, ErrConfigNotFound
|
||||||
|
}
|
||||||
|
r := rows[0]
|
||||||
|
|
||||||
|
cfg := &Config{
|
||||||
|
Namespace: r.Namespace,
|
||||||
|
NtfyBaseURL: r.NtfyBaseURL,
|
||||||
|
UpdatedAt: r.UpdatedAt,
|
||||||
|
UpdatedBy: r.UpdatedBy,
|
||||||
|
}
|
||||||
|
if r.NtfyAuthTokenEncrypted != "" {
|
||||||
|
v, err := secrets.Decrypt(r.NtfyAuthTokenEncrypted, s.encKey)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("push config: decrypt ntfy auth token: %w", err)
|
||||||
|
}
|
||||||
|
cfg.NtfyAuthToken = v
|
||||||
|
}
|
||||||
|
if r.ExpoAccessTokenEncrypted != "" {
|
||||||
|
v, err := secrets.Decrypt(r.ExpoAccessTokenEncrypted, s.encKey)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("push config: decrypt expo access token: %w", err)
|
||||||
|
}
|
||||||
|
cfg.ExpoAccessToken = v
|
||||||
|
}
|
||||||
|
return cfg, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upsert writes or replaces the namespace's config. Sensitive fields are
|
||||||
|
// encrypted before storage. Empty strings for credential fields clear them.
|
||||||
|
func (s *rqliteConfigStore) Upsert(ctx context.Context, cfg Config) error {
|
||||||
|
if cfg.Namespace == "" {
|
||||||
|
return fmt.Errorf("push config: namespace required")
|
||||||
|
}
|
||||||
|
|
||||||
|
var ntfyEnc, expoEnc string
|
||||||
|
if cfg.NtfyAuthToken != "" {
|
||||||
|
v, err := secrets.Encrypt(cfg.NtfyAuthToken, s.encKey)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("push config: encrypt ntfy auth token: %w", err)
|
||||||
|
}
|
||||||
|
ntfyEnc = v
|
||||||
|
}
|
||||||
|
if cfg.ExpoAccessToken != "" {
|
||||||
|
v, err := secrets.Encrypt(cfg.ExpoAccessToken, s.encKey)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("push config: encrypt expo access token: %w", err)
|
||||||
|
}
|
||||||
|
expoEnc = v
|
||||||
|
}
|
||||||
|
updatedAt := cfg.UpdatedAt
|
||||||
|
if updatedAt == 0 {
|
||||||
|
updatedAt = time.Now().Unix()
|
||||||
|
}
|
||||||
|
|
||||||
|
const q = `INSERT INTO namespace_push_config (
|
||||||
|
namespace, ntfy_base_url, ntfy_auth_token_encrypted,
|
||||||
|
expo_access_token_encrypted, updated_at, updated_by
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(namespace) DO UPDATE SET
|
||||||
|
ntfy_base_url = excluded.ntfy_base_url,
|
||||||
|
ntfy_auth_token_encrypted = excluded.ntfy_auth_token_encrypted,
|
||||||
|
expo_access_token_encrypted = excluded.expo_access_token_encrypted,
|
||||||
|
updated_at = excluded.updated_at,
|
||||||
|
updated_by = excluded.updated_by`
|
||||||
|
|
||||||
|
if _, err := s.db.Exec(ctx, q,
|
||||||
|
cfg.Namespace, cfg.NtfyBaseURL, ntfyEnc, expoEnc, updatedAt, cfg.UpdatedBy,
|
||||||
|
); err != nil {
|
||||||
|
return fmt.Errorf("push config: upsert: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete clears the namespace's config row. After this the manager falls
|
||||||
|
// back to YAML defaults (or refuses if no defaults set).
|
||||||
|
func (s *rqliteConfigStore) Delete(ctx context.Context, namespace string) error {
|
||||||
|
if namespace == "" {
|
||||||
|
return fmt.Errorf("push config: namespace required")
|
||||||
|
}
|
||||||
|
const q = `DELETE FROM namespace_push_config WHERE namespace = ?`
|
||||||
|
if _, err := s.db.Exec(ctx, q, namespace); err != nil {
|
||||||
|
return fmt.Errorf("push config: delete: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
265
core/pkg/push/manager.go
Normal file
265
core/pkg/push/manager.go
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
package push
|
||||||
|
|
||||||
|
// manager.go — top-level entry point for sending push notifications,
|
||||||
|
// with per-namespace provider configuration that tenants self-serve.
|
||||||
|
//
|
||||||
|
// The Manager wraps:
|
||||||
|
// - a ConfigStore (per-namespace overrides in RQLite)
|
||||||
|
// - a fallback Defaults (gateway YAML — the cluster-wide default if
|
||||||
|
// a namespace hasn't set its own)
|
||||||
|
// - an LRU cache of built dispatchers keyed by namespace
|
||||||
|
//
|
||||||
|
// Every send-path (HTTP, WASM hostfunc) goes through Manager.SendToUser
|
||||||
|
// or Manager.Send. The cache eliminates per-call config decryption +
|
||||||
|
// provider construction overhead — only the FIRST send for a namespace
|
||||||
|
// pays that cost; subsequent sends are zero-allocation lookups.
|
||||||
|
//
|
||||||
|
// Cache invalidation: HTTP config-change handlers call Invalidate after
|
||||||
|
// PUT/DELETE so the next send rebuilds with fresh config.
|
||||||
|
//
|
||||||
|
// See bug #220 follow-up. Generic by design — same pattern works for
|
||||||
|
// any future "tenant should self-serve this knob" feature.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"container/list"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ProviderFactory builds the set of providers for a resolved Config.
|
||||||
|
// Injected by the gateway so the push package itself doesn't import
|
||||||
|
// the provider sub-packages (avoids a circular dependency: provider
|
||||||
|
// sub-packages already depend on push for the PushProvider interface).
|
||||||
|
//
|
||||||
|
// The factory is called once per fresh dispatcher build (cache miss).
|
||||||
|
// Empty slice is allowed and means "this config produces no providers";
|
||||||
|
// Manager treats that as ErrPushNotConfigured.
|
||||||
|
type ProviderFactory func(cfg Config) []PushProvider
|
||||||
|
|
||||||
|
// ErrPushNotConfigured is returned by Send when the namespace has no
|
||||||
|
// per-namespace config AND the gateway has no fallback defaults — i.e.
|
||||||
|
// nothing to send through. Distinguish from ErrNoDevices (different
|
||||||
|
// failure mode).
|
||||||
|
var ErrPushNotConfigured = errors.New("push not configured for namespace; set ntfy_base_url or expo_access_token via PUT /v1/push/config")
|
||||||
|
|
||||||
|
// Defaults are the gateway-YAML fallback when a namespace hasn't set its
|
||||||
|
// own config. Any field set here applies to every namespace that doesn't
|
||||||
|
// override it. Empty Defaults means "no global default — namespace must
|
||||||
|
// set their own".
|
||||||
|
type Defaults struct {
|
||||||
|
NtfyBaseURL string
|
||||||
|
NtfyAuthToken string
|
||||||
|
ExpoAccessToken string
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsEmpty returns true when no fallback provider is set.
|
||||||
|
func (d Defaults) IsEmpty() bool {
|
||||||
|
return d.NtfyBaseURL == "" && d.ExpoAccessToken == ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manager is the top-level push entry point. Build with NewManager and
|
||||||
|
// hand out via the gateway's dependencies. Safe for concurrent use.
|
||||||
|
type Manager struct {
|
||||||
|
store ConfigStore
|
||||||
|
devices PushDeviceStore
|
||||||
|
defaults Defaults
|
||||||
|
factory ProviderFactory
|
||||||
|
logger *zap.Logger
|
||||||
|
|
||||||
|
// cache LRU of namespace → built dispatcher.
|
||||||
|
mu sync.Mutex
|
||||||
|
cache map[string]*list.Element
|
||||||
|
lru *list.List
|
||||||
|
cacheCap int
|
||||||
|
}
|
||||||
|
|
||||||
|
// cacheEntry is the doubly-linked-list node + dispatcher payload.
|
||||||
|
type cacheEntry struct {
|
||||||
|
namespace string
|
||||||
|
dispatcher *PushDispatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultCacheCap caps how many namespaces' dispatchers we hold in memory.
|
||||||
|
// Each entry is small (~few hundred bytes); 256 is generous and bounds
|
||||||
|
// memory under abuse.
|
||||||
|
const defaultCacheCap = 256
|
||||||
|
|
||||||
|
// NewManager constructs a Manager with the given device store, config
|
||||||
|
// store, fallback Defaults, and ProviderFactory.
|
||||||
|
//
|
||||||
|
// The ProviderFactory is REQUIRED — it tells the manager how to turn a
|
||||||
|
// resolved Config into PushProvider instances. The gateway's
|
||||||
|
// dependencies wires this up (it imports the provider sub-packages and
|
||||||
|
// returns the right providers for each config field that's populated).
|
||||||
|
func NewManager(devices PushDeviceStore, store ConfigStore, defaults Defaults, factory ProviderFactory, logger *zap.Logger) *Manager {
|
||||||
|
if logger == nil {
|
||||||
|
logger = zap.NewNop()
|
||||||
|
}
|
||||||
|
return &Manager{
|
||||||
|
store: store,
|
||||||
|
devices: devices,
|
||||||
|
defaults: defaults,
|
||||||
|
factory: factory,
|
||||||
|
logger: logger,
|
||||||
|
cache: make(map[string]*list.Element, defaultCacheCap),
|
||||||
|
lru: list.New(),
|
||||||
|
cacheCap: defaultCacheCap,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendToUser dispatches a push to every device registered for the user
|
||||||
|
// in the given namespace. Looks up per-namespace config (or falls back
|
||||||
|
// to defaults), builds the appropriate dispatcher, and sends.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - nil on success or "no devices" (per existing PushDispatcher
|
||||||
|
// contract — sending push to a user with zero devices is a no-op)
|
||||||
|
// - ErrPushNotConfigured when neither namespace config nor gateway
|
||||||
|
// defaults provide a working provider
|
||||||
|
func (m *Manager) SendToUser(ctx context.Context, namespace, userID string, msg PushMessage) error {
|
||||||
|
d, err := m.dispatcherFor(ctx, namespace)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return d.SendToUser(ctx, namespace, userID, msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeviceStore exposes the underlying device store so HTTP handlers
|
||||||
|
// (register/list/delete) can use it directly without going through the
|
||||||
|
// dispatcher path.
|
||||||
|
func (m *Manager) DeviceStore() PushDeviceStore {
|
||||||
|
return m.devices
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsConfigured returns true when the namespace has per-namespace config
|
||||||
|
// OR usable fallback defaults — i.e. SendToUser would not fail with
|
||||||
|
// ErrPushNotConfigured. Cheap path-check used by HTTP 503 responses.
|
||||||
|
func (m *Manager) IsConfigured(ctx context.Context, namespace string) bool {
|
||||||
|
if !m.defaults.IsEmpty() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if m.store == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
cfg, err := m.store.Get(ctx, namespace)
|
||||||
|
if err != nil || cfg == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return !cfg.IsEmpty()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalidate evicts the cached dispatcher for a namespace. Call after a
|
||||||
|
// successful PUT/DELETE on /v1/push/config so the next SendToUser uses
|
||||||
|
// fresh config.
|
||||||
|
func (m *Manager) Invalidate(namespace string) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
if elem, ok := m.cache[namespace]; ok {
|
||||||
|
m.lru.Remove(elem)
|
||||||
|
delete(m.cache, namespace)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// dispatcherFor returns a (cached or freshly built) dispatcher with the
|
||||||
|
// providers configured for the given namespace.
|
||||||
|
func (m *Manager) dispatcherFor(ctx context.Context, namespace string) (*PushDispatcher, error) {
|
||||||
|
// Fast path — already cached.
|
||||||
|
m.mu.Lock()
|
||||||
|
if elem, ok := m.cache[namespace]; ok {
|
||||||
|
m.lru.MoveToFront(elem)
|
||||||
|
entry := elem.Value.(*cacheEntry)
|
||||||
|
m.mu.Unlock()
|
||||||
|
return entry.dispatcher, nil
|
||||||
|
}
|
||||||
|
m.mu.Unlock()
|
||||||
|
|
||||||
|
// Slow path — load config + build dispatcher.
|
||||||
|
d, err := m.buildDispatcher(ctx, namespace)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert into cache (eviction if at capacity).
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
// Recheck under lock — another goroutine may have built one.
|
||||||
|
if elem, ok := m.cache[namespace]; ok {
|
||||||
|
m.lru.MoveToFront(elem)
|
||||||
|
return elem.Value.(*cacheEntry).dispatcher, nil
|
||||||
|
}
|
||||||
|
if m.lru.Len() >= m.cacheCap {
|
||||||
|
oldest := m.lru.Back()
|
||||||
|
if oldest != nil {
|
||||||
|
old := oldest.Value.(*cacheEntry)
|
||||||
|
m.lru.Remove(oldest)
|
||||||
|
delete(m.cache, old.namespace)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entry := &cacheEntry{namespace: namespace, dispatcher: d}
|
||||||
|
m.cache[namespace] = m.lru.PushFront(entry)
|
||||||
|
return d, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildDispatcher resolves the effective config for the namespace and
|
||||||
|
// constructs a fresh PushDispatcher with the right providers registered.
|
||||||
|
//
|
||||||
|
// Effective-config resolution:
|
||||||
|
// 1. Start with gateway YAML defaults
|
||||||
|
// 2. If a per-namespace row exists, override field-by-field
|
||||||
|
// 3. If the result has zero providers, return ErrPushNotConfigured
|
||||||
|
func (m *Manager) buildDispatcher(ctx context.Context, namespace string) (*PushDispatcher, error) {
|
||||||
|
eff := Config{Namespace: namespace}
|
||||||
|
// Start from defaults.
|
||||||
|
eff.NtfyBaseURL = m.defaults.NtfyBaseURL
|
||||||
|
eff.NtfyAuthToken = m.defaults.NtfyAuthToken
|
||||||
|
eff.ExpoAccessToken = m.defaults.ExpoAccessToken
|
||||||
|
|
||||||
|
// Layer namespace overrides if any.
|
||||||
|
if m.store != nil {
|
||||||
|
nc, err := m.store.Get(ctx, namespace)
|
||||||
|
if err != nil && !errors.Is(err, ErrConfigNotFound) {
|
||||||
|
return nil, fmt.Errorf("manager: load namespace config: %w", err)
|
||||||
|
}
|
||||||
|
if nc != nil {
|
||||||
|
// Each field overrides the default if non-empty. An explicit
|
||||||
|
// empty value via PUT clears the namespace's row entirely
|
||||||
|
// (DELETE) — there's no "set this field to empty to clear"
|
||||||
|
// half-state, by design.
|
||||||
|
if nc.NtfyBaseURL != "" {
|
||||||
|
eff.NtfyBaseURL = nc.NtfyBaseURL
|
||||||
|
}
|
||||||
|
if nc.NtfyAuthToken != "" {
|
||||||
|
eff.NtfyAuthToken = nc.NtfyAuthToken
|
||||||
|
}
|
||||||
|
if nc.ExpoAccessToken != "" {
|
||||||
|
eff.ExpoAccessToken = nc.ExpoAccessToken
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Refuse to build a dispatcher with no providers — caller gets a
|
||||||
|
// clear error instead of a silent no-op.
|
||||||
|
if eff.NtfyBaseURL == "" && eff.ExpoAccessToken == "" {
|
||||||
|
return nil, ErrPushNotConfigured
|
||||||
|
}
|
||||||
|
if m.factory == nil {
|
||||||
|
// Defensive: a Manager built without a factory can't produce
|
||||||
|
// providers. Programmer error; surface explicitly.
|
||||||
|
return nil, fmt.Errorf("manager: no provider factory configured")
|
||||||
|
}
|
||||||
|
|
||||||
|
providers := m.factory(eff)
|
||||||
|
if len(providers) == 0 {
|
||||||
|
return nil, ErrPushNotConfigured
|
||||||
|
}
|
||||||
|
|
||||||
|
d := New(m.devices, m.logger)
|
||||||
|
for _, p := range providers {
|
||||||
|
d.Register(p)
|
||||||
|
}
|
||||||
|
return d, nil
|
||||||
|
}
|
||||||
312
core/pkg/push/manager_test.go
Normal file
312
core/pkg/push/manager_test.go
Normal file
@ -0,0 +1,312 @@
|
|||||||
|
package push
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fakeConfigStore is a minimal in-memory ConfigStore for tests.
|
||||||
|
type fakeConfigStore struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
configs map[string]*Config
|
||||||
|
getErr error // optional injected error
|
||||||
|
calls atomic.Int32
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFakeConfigStore() *fakeConfigStore {
|
||||||
|
return &fakeConfigStore{configs: map[string]*Config{}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeConfigStore) Get(_ context.Context, ns string) (*Config, error) {
|
||||||
|
f.calls.Add(1)
|
||||||
|
if f.getErr != nil {
|
||||||
|
return nil, f.getErr
|
||||||
|
}
|
||||||
|
f.mu.Lock()
|
||||||
|
defer f.mu.Unlock()
|
||||||
|
cfg, ok := f.configs[ns]
|
||||||
|
if !ok {
|
||||||
|
return nil, ErrConfigNotFound
|
||||||
|
}
|
||||||
|
// Return a copy so the caller can't mutate our state.
|
||||||
|
cp := *cfg
|
||||||
|
return &cp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeConfigStore) Upsert(_ context.Context, c Config) error {
|
||||||
|
f.mu.Lock()
|
||||||
|
defer f.mu.Unlock()
|
||||||
|
cp := c
|
||||||
|
f.configs[c.Namespace] = &cp
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeConfigStore) Delete(_ context.Context, ns string) error {
|
||||||
|
f.mu.Lock()
|
||||||
|
defer f.mu.Unlock()
|
||||||
|
delete(f.configs, ns)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// managerFakeProvider is a stub PushProvider for manager tests.
|
||||||
|
// (The dispatcher tests already declare a `fakeProvider`; rename ours.)
|
||||||
|
type managerFakeProvider struct {
|
||||||
|
name string
|
||||||
|
sends atomic.Int32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *managerFakeProvider) Name() string { return p.name }
|
||||||
|
func (p *managerFakeProvider) Send(_ context.Context, _ PushMessage) error {
|
||||||
|
p.sends.Add(1)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
func (p *managerFakeProvider) SendBulk(_ context.Context, msgs []PushMessage) []error {
|
||||||
|
for range msgs {
|
||||||
|
p.sends.Add(1)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_namespace_with_no_config_uses_defaults(t *testing.T) {
|
||||||
|
store := newFakeConfigStore() // empty
|
||||||
|
defaults := Defaults{NtfyBaseURL: "http://default-ntfy"}
|
||||||
|
|
||||||
|
var providerCalls atomic.Int32
|
||||||
|
factory := func(c Config) []PushProvider {
|
||||||
|
providerCalls.Add(1)
|
||||||
|
// Verify the manager passed defaults through to the factory.
|
||||||
|
if c.NtfyBaseURL != "http://default-ntfy" {
|
||||||
|
t.Errorf("factory got URL=%q, want default", c.NtfyBaseURL)
|
||||||
|
}
|
||||||
|
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
|
||||||
|
}
|
||||||
|
|
||||||
|
m := NewManager(&fakeDeviceStore{}, store, defaults, factory, zap.NewNop())
|
||||||
|
d, err := m.dispatcherFor(context.Background(), "ns-A")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected default to satisfy: %v", err)
|
||||||
|
}
|
||||||
|
if d == nil {
|
||||||
|
t.Fatal("expected a dispatcher")
|
||||||
|
}
|
||||||
|
if providerCalls.Load() != 1 {
|
||||||
|
t.Errorf("factory should have been called once, got %d", providerCalls.Load())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_namespace_config_overrides_defaults(t *testing.T) {
|
||||||
|
store := newFakeConfigStore()
|
||||||
|
store.Upsert(context.Background(), Config{
|
||||||
|
Namespace: "ns-A",
|
||||||
|
NtfyBaseURL: "http://override-ntfy",
|
||||||
|
})
|
||||||
|
defaults := Defaults{NtfyBaseURL: "http://default-ntfy"}
|
||||||
|
|
||||||
|
var seenURL string
|
||||||
|
factory := func(c Config) []PushProvider {
|
||||||
|
seenURL = c.NtfyBaseURL
|
||||||
|
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
|
||||||
|
}
|
||||||
|
|
||||||
|
m := NewManager(&fakeDeviceStore{}, store, defaults, factory, zap.NewNop())
|
||||||
|
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
|
||||||
|
t.Fatalf("dispatcherFor: %v", err)
|
||||||
|
}
|
||||||
|
if seenURL != "http://override-ntfy" {
|
||||||
|
t.Errorf("factory got URL=%q, want the namespace override", seenURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_no_config_no_defaults_returns_ErrPushNotConfigured(t *testing.T) {
|
||||||
|
store := newFakeConfigStore()
|
||||||
|
factory := func(_ Config) []PushProvider { return nil }
|
||||||
|
|
||||||
|
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
|
||||||
|
_, err := m.dispatcherFor(context.Background(), "ns-A")
|
||||||
|
if !errors.Is(err, ErrPushNotConfigured) {
|
||||||
|
t.Errorf("expected ErrPushNotConfigured, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_caches_dispatchers_per_namespace(t *testing.T) {
|
||||||
|
store := newFakeConfigStore()
|
||||||
|
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "u"})
|
||||||
|
|
||||||
|
var factoryCalls atomic.Int32
|
||||||
|
factory := func(_ Config) []PushProvider {
|
||||||
|
factoryCalls.Add(1)
|
||||||
|
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
|
||||||
|
}
|
||||||
|
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
|
||||||
|
|
||||||
|
// Two calls for the same namespace = one factory invocation (cache hit).
|
||||||
|
for i := 0; i < 5; i++ {
|
||||||
|
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
|
||||||
|
t.Fatalf("iter %d: %v", i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if factoryCalls.Load() != 1 {
|
||||||
|
t.Errorf("expected 1 factory call due to cache, got %d", factoryCalls.Load())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_invalidate_forces_rebuild(t *testing.T) {
|
||||||
|
store := newFakeConfigStore()
|
||||||
|
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "v1"})
|
||||||
|
|
||||||
|
var seenURLs []string
|
||||||
|
factory := func(c Config) []PushProvider {
|
||||||
|
seenURLs = append(seenURLs, c.NtfyBaseURL)
|
||||||
|
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
|
||||||
|
}
|
||||||
|
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
|
||||||
|
|
||||||
|
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// Update config + invalidate.
|
||||||
|
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "v2"})
|
||||||
|
m.Invalidate("ns-A")
|
||||||
|
|
||||||
|
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if len(seenURLs) != 2 || seenURLs[0] != "v1" || seenURLs[1] != "v2" {
|
||||||
|
t.Errorf("expected factory to be called twice with v1 then v2, got %v", seenURLs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_per_namespace_isolation(t *testing.T) {
|
||||||
|
// ns-A has its own config; ns-B has none and uses defaults.
|
||||||
|
store := newFakeConfigStore()
|
||||||
|
store.Upsert(context.Background(), Config{Namespace: "ns-A", NtfyBaseURL: "ns-A-url"})
|
||||||
|
|
||||||
|
defaults := Defaults{NtfyBaseURL: "default-url"}
|
||||||
|
|
||||||
|
urlByNS := make(map[string]string)
|
||||||
|
var mu sync.Mutex
|
||||||
|
factory := func(c Config) []PushProvider {
|
||||||
|
mu.Lock()
|
||||||
|
urlByNS[c.Namespace] = c.NtfyBaseURL
|
||||||
|
mu.Unlock()
|
||||||
|
return []PushProvider{&managerFakeProvider{name: "ntfy"}}
|
||||||
|
}
|
||||||
|
|
||||||
|
m := NewManager(&fakeDeviceStore{}, store, defaults, factory, zap.NewNop())
|
||||||
|
if _, err := m.dispatcherFor(context.Background(), "ns-A"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if _, err := m.dispatcherFor(context.Background(), "ns-B"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
if urlByNS["ns-A"] != "ns-A-url" {
|
||||||
|
t.Errorf("ns-A got %q, want ns-A-url", urlByNS["ns-A"])
|
||||||
|
}
|
||||||
|
if urlByNS["ns-B"] != "default-url" {
|
||||||
|
t.Errorf("ns-B got %q, want default-url", urlByNS["ns-B"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_IsConfigured_distinguishes_states(t *testing.T) {
|
||||||
|
store := newFakeConfigStore()
|
||||||
|
store.Upsert(context.Background(), Config{Namespace: "with-cfg", NtfyBaseURL: "u"})
|
||||||
|
|
||||||
|
// Without defaults, only namespace with config returns true.
|
||||||
|
m1 := NewManager(&fakeDeviceStore{}, store, Defaults{}, nil, zap.NewNop())
|
||||||
|
if !m1.IsConfigured(context.Background(), "with-cfg") {
|
||||||
|
t.Error("expected with-cfg to be configured")
|
||||||
|
}
|
||||||
|
if m1.IsConfigured(context.Background(), "no-cfg") {
|
||||||
|
t.Error("expected no-cfg to NOT be configured (no defaults)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// With defaults, every namespace is configured.
|
||||||
|
m2 := NewManager(&fakeDeviceStore{}, store, Defaults{NtfyBaseURL: "d"}, nil, zap.NewNop())
|
||||||
|
if !m2.IsConfigured(context.Background(), "no-cfg") {
|
||||||
|
t.Error("expected no-cfg to be configured via defaults")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestManager_concurrent_dispatcherFor_no_race(t *testing.T) {
|
||||||
|
// Run with -race.
|
||||||
|
store := newFakeConfigStore()
|
||||||
|
store.Upsert(context.Background(), Config{Namespace: "ns", NtfyBaseURL: "u"})
|
||||||
|
factory := func(_ Config) []PushProvider { return []PushProvider{&managerFakeProvider{name: "ntfy"}} }
|
||||||
|
|
||||||
|
m := NewManager(&fakeDeviceStore{}, store, Defaults{}, factory, zap.NewNop())
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for g := 0; g < 16; g++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for i := 0; i < 50; i++ {
|
||||||
|
_, _ = m.dispatcherFor(context.Background(), "ns")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// fakeDeviceStore is a stub PushDeviceStore for manager tests.
|
||||||
|
type fakeDeviceStore struct{}
|
||||||
|
|
||||||
|
func (s *fakeDeviceStore) Upsert(_ context.Context, _ PushDevice) error { return nil }
|
||||||
|
func (s *fakeDeviceStore) Delete(_ context.Context, _, _ string) error { return nil }
|
||||||
|
func (s *fakeDeviceStore) ListForUser(_ context.Context, _, _ string) ([]PushDevice, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_Redacted_omits_secrets(t *testing.T) {
|
||||||
|
cfg := &Config{
|
||||||
|
Namespace: "ns",
|
||||||
|
NtfyBaseURL: "https://ntfy.example",
|
||||||
|
NtfyAuthToken: "super-secret-token",
|
||||||
|
ExpoAccessToken: "another-super-secret",
|
||||||
|
UpdatedAt: 12345,
|
||||||
|
UpdatedBy: "admin",
|
||||||
|
}
|
||||||
|
r := cfg.Redacted()
|
||||||
|
if r.NtfyBaseURL != "https://ntfy.example" {
|
||||||
|
t.Errorf("ntfy URL must be present (it's not a secret), got %q", r.NtfyBaseURL)
|
||||||
|
}
|
||||||
|
if !r.HasNtfyAuthToken {
|
||||||
|
t.Error("HasNtfyAuthToken must reflect that the token is set")
|
||||||
|
}
|
||||||
|
if !r.HasExpoAccessToken {
|
||||||
|
t.Error("HasExpoAccessToken must reflect that the token is set")
|
||||||
|
}
|
||||||
|
// Most importantly: the tokens themselves must not appear anywhere
|
||||||
|
// in the redacted view. Walk the struct's exported fields.
|
||||||
|
if r.UpdatedAt != 12345 {
|
||||||
|
t.Errorf("UpdatedAt should round-trip, got %d", r.UpdatedAt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConfig_IsEmpty(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
c *Config
|
||||||
|
want bool
|
||||||
|
}{
|
||||||
|
{"nil pointer", nil, true},
|
||||||
|
{"zero value", &Config{}, true},
|
||||||
|
{"namespace only", &Config{Namespace: "ns"}, true},
|
||||||
|
{"with ntfy URL", &Config{NtfyBaseURL: "u"}, false},
|
||||||
|
{"with expo token", &Config{ExpoAccessToken: "t"}, false},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
t.Run(c.name, func(t *testing.T) {
|
||||||
|
if got := c.c.IsEmpty(); got != c.want {
|
||||||
|
t.Errorf("IsEmpty() = %v, want %v", got, c.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
171
core/pkg/serverless/engine_anon_module_test.go
Normal file
171
core/pkg/serverless/engine_anon_module_test.go
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
package serverless
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// minimalWASM is the same nop-only WASM from TestEngine_Execute —
|
||||||
|
// exports _start, returns immediately. Sufficient for proving that
|
||||||
|
// ExecuteModule's instantiation contract works under repeated /
|
||||||
|
// concurrent calls without name-collision (bug #221).
|
||||||
|
var minimalWASM = []byte{
|
||||||
|
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
|
||||||
|
0x01, 0x04, 0x01, 0x60, 0x00, 0x00,
|
||||||
|
0x03, 0x02, 0x01, 0x00,
|
||||||
|
0x07, 0x0a, 0x01, 0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x00, 0x00,
|
||||||
|
0x0a, 0x04, 0x01, 0x02, 0x00, 0x0b,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bug #221 regression: invocations of the SAME function must succeed
|
||||||
|
// repeatedly. Before the fix, the second invocation hit
|
||||||
|
// `module[<fn>] has already been instantiated` because the executor
|
||||||
|
// passed the function name to wazero's WithName which globally
|
||||||
|
// registered it.
|
||||||
|
func TestEngine_RepeatedInvocations_SameFunction_NoCollision(t *testing.T) {
|
||||||
|
logger := zap.NewNop()
|
||||||
|
registry := NewMockRegistry()
|
||||||
|
hostServices := NewMockHostServices()
|
||||||
|
engine, err := NewEngine(nil, registry, hostServices, logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewEngine: %v", err)
|
||||||
|
}
|
||||||
|
defer engine.Close(context.Background())
|
||||||
|
|
||||||
|
fnDef := &FunctionDefinition{
|
||||||
|
Name: "username-check",
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
MemoryLimitMB: 64,
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
}
|
||||||
|
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
|
||||||
|
t.Fatalf("Register: %v", err)
|
||||||
|
}
|
||||||
|
fn, err := registry.Get(context.Background(), "anchat-test", "username-check", 0)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Get: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Five sequential invocations — each must succeed.
|
||||||
|
for i := 0; i < 5; i++ {
|
||||||
|
if _, err := engine.Execute(context.Background(), fn, []byte("x"), nil); err != nil {
|
||||||
|
t.Fatalf("invocation %d failed: %v", i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bug #221 regression: two CONCURRENT invocations of the same function
|
||||||
|
// must both succeed. Before the fix this was THE most reliable repro —
|
||||||
|
// the second goroutine's InstantiateModule lost the race for the global
|
||||||
|
// name slot.
|
||||||
|
func TestEngine_ConcurrentInvocations_SameFunction_NoCollision(t *testing.T) {
|
||||||
|
logger := zap.NewNop()
|
||||||
|
registry := NewMockRegistry()
|
||||||
|
hostServices := NewMockHostServices()
|
||||||
|
engine, err := NewEngine(nil, registry, hostServices, logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewEngine: %v", err)
|
||||||
|
}
|
||||||
|
defer engine.Close(context.Background())
|
||||||
|
|
||||||
|
fnDef := &FunctionDefinition{
|
||||||
|
Name: "user-create",
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
MemoryLimitMB: 64,
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
}
|
||||||
|
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
|
||||||
|
t.Fatalf("Register: %v", err)
|
||||||
|
}
|
||||||
|
fn, err := registry.Get(context.Background(), "anchat-test", "user-create", 0)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Get: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
const goroutines = 16
|
||||||
|
const iterations = 10
|
||||||
|
|
||||||
|
var (
|
||||||
|
wg sync.WaitGroup
|
||||||
|
errMu sync.Mutex
|
||||||
|
firstErr error
|
||||||
|
errPayloads []string
|
||||||
|
)
|
||||||
|
for g := 0; g < goroutines; g++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for i := 0; i < iterations; i++ {
|
||||||
|
if _, err := engine.Execute(context.Background(), fn, []byte("x"), nil); err != nil {
|
||||||
|
errMu.Lock()
|
||||||
|
if firstErr == nil {
|
||||||
|
firstErr = err
|
||||||
|
}
|
||||||
|
errPayloads = append(errPayloads, err.Error())
|
||||||
|
errMu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if firstErr != nil {
|
||||||
|
// Surface ALL distinct error messages — easier to debug than the
|
||||||
|
// first one alone if the runtime starts failing weirdly.
|
||||||
|
t.Fatalf("concurrent invocations hit %d errors. first: %v\nall: %v",
|
||||||
|
len(errPayloads), firstErr, errPayloads)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bug #221 regression: re-deploy of the same function (new version, new
|
||||||
|
// WASM CID) followed by an invocation must succeed. The old compiled
|
||||||
|
// module's cache slot must not block the new instance from running.
|
||||||
|
func TestEngine_ReDeploy_ThenInvoke_NoCollision(t *testing.T) {
|
||||||
|
logger := zap.NewNop()
|
||||||
|
registry := NewMockRegistry()
|
||||||
|
hostServices := NewMockHostServices()
|
||||||
|
engine, err := NewEngine(nil, registry, hostServices, logger)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewEngine: %v", err)
|
||||||
|
}
|
||||||
|
defer engine.Close(context.Background())
|
||||||
|
|
||||||
|
fnDef := &FunctionDefinition{
|
||||||
|
Name: "wallet-link",
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
MemoryLimitMB: 64,
|
||||||
|
TimeoutSeconds: 5,
|
||||||
|
}
|
||||||
|
|
||||||
|
// First deploy + invoke.
|
||||||
|
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
|
||||||
|
t.Fatalf("first Register: %v", err)
|
||||||
|
}
|
||||||
|
fn1, _ := registry.Get(context.Background(), "anchat-test", "wallet-link", 0)
|
||||||
|
if _, err := engine.Execute(context.Background(), fn1, []byte("x"), nil); err != nil {
|
||||||
|
t.Fatalf("first invoke: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-deploy: same name, new bytes (still valid). MockRegistry overwrites
|
||||||
|
// the function record; the engine sees a fresh wasmCID.
|
||||||
|
if _, err := registry.Register(context.Background(), fnDef, minimalWASM); err != nil {
|
||||||
|
t.Fatalf("re-deploy: %v", err)
|
||||||
|
}
|
||||||
|
fn2, _ := registry.Get(context.Background(), "anchat-test", "wallet-link", 0)
|
||||||
|
|
||||||
|
// Multiple invocations after re-deploy — exactly the AnChat repro.
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
_, err := engine.Execute(context.Background(), fn2, []byte("x"), nil)
|
||||||
|
if err != nil {
|
||||||
|
if strings.Contains(err.Error(), "already been instantiated") {
|
||||||
|
t.Fatalf("bug #221 regression: %v", err)
|
||||||
|
}
|
||||||
|
t.Fatalf("post-redeploy invoke %d failed: %v", i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -34,6 +34,20 @@ func NewExecutor(runtime wazero.Runtime, logger *zap.Logger, maxConcurrent int)
|
|||||||
|
|
||||||
// ExecuteModule instantiates and runs a WASM module with the given input.
|
// ExecuteModule instantiates and runs a WASM module with the given input.
|
||||||
// The contextSetter callback is used to set invocation context on host services.
|
// The contextSetter callback is used to set invocation context on host services.
|
||||||
|
//
|
||||||
|
// Bug #221 fix: each invocation gets an ANONYMOUS module instance (no name
|
||||||
|
// in the wazero runtime registry). Previously we used the function name
|
||||||
|
// as the module name, which made wazero refuse the second instantiation
|
||||||
|
// with "module[<fn>] has already been instantiated" any time:
|
||||||
|
//
|
||||||
|
// - Two invocations of the same function ran concurrently
|
||||||
|
// - A re-deploy happened while a previous invocation was still mid-flight
|
||||||
|
// - An instance.Close() raced with the next instantiation
|
||||||
|
//
|
||||||
|
// Anonymous instances eliminate the collision class entirely. The
|
||||||
|
// function name is still surfaced via WithArgs(moduleName) so WASI
|
||||||
|
// `argv[0]` shows it inside the function (matches the prior behavior
|
||||||
|
// that user code may depend on).
|
||||||
func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledModule, moduleName string, input []byte, contextSetter func(), contextClearer func()) ([]byte, error) {
|
func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledModule, moduleName string, input []byte, contextSetter func(), contextClearer func()) ([]byte, error) {
|
||||||
// Set invocation context for host functions
|
// Set invocation context for host functions
|
||||||
if contextSetter != nil {
|
if contextSetter != nil {
|
||||||
@ -48,9 +62,14 @@ func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledMo
|
|||||||
stdout := new(bytes.Buffer)
|
stdout := new(bytes.Buffer)
|
||||||
stderr := new(bytes.Buffer)
|
stderr := new(bytes.Buffer)
|
||||||
|
|
||||||
// Create module configuration with WASI stdio
|
// Create module configuration with WASI stdio.
|
||||||
|
//
|
||||||
|
// WithName("") = anonymous instance, NOT registered under the runtime's
|
||||||
|
// global module name table. argv[0] still carries the function name for
|
||||||
|
// WASI-consuming code that reads os.Args[0] (TinyGo's runtime + apps
|
||||||
|
// that log it). See doc comment above for the bug-#221 rationale.
|
||||||
moduleConfig := wazero.NewModuleConfig().
|
moduleConfig := wazero.NewModuleConfig().
|
||||||
WithName(moduleName).
|
WithName("").
|
||||||
WithStdin(stdin).
|
WithStdin(stdin).
|
||||||
WithStdout(stdout).
|
WithStdout(stdout).
|
||||||
WithStderr(stderr).
|
WithStderr(stderr).
|
||||||
|
|||||||
@ -16,10 +16,14 @@ import (
|
|||||||
|
|
||||||
// NewHostFunctions creates a new HostFunctions instance.
|
// NewHostFunctions creates a new HostFunctions instance.
|
||||||
//
|
//
|
||||||
// pushDispatcher and wsBridge may be nil when those features aren't
|
// pushDispatcher / pushManager / wsBridge may be nil when those features
|
||||||
// configured on this gateway — in that case PushSend silently no-ops
|
// aren't configured on this gateway. PushSend prefers pushManager when
|
||||||
// (so functions stay portable) and WSPubSubBridge returns an explicit
|
// present (per-namespace config takes effect), falls back to pushDispatcher,
|
||||||
// error (because absence of a requested bridge should be visible).
|
// and silently no-ops when both are nil — so functions stay portable
|
||||||
|
// across deployments with/without push.
|
||||||
|
//
|
||||||
|
// WSPubSubBridge returns an explicit error when wsBridge is nil because
|
||||||
|
// absence of a requested bridge should be visible (callers asked for it).
|
||||||
func NewHostFunctions(
|
func NewHostFunctions(
|
||||||
db rqlite.Client,
|
db rqlite.Client,
|
||||||
cacheClient olriclib.Client,
|
cacheClient olriclib.Client,
|
||||||
@ -28,6 +32,7 @@ func NewHostFunctions(
|
|||||||
wsManager serverless.WebSocketManager,
|
wsManager serverless.WebSocketManager,
|
||||||
secrets serverless.SecretsManager,
|
secrets serverless.SecretsManager,
|
||||||
pushDispatcher *push.PushDispatcher,
|
pushDispatcher *push.PushDispatcher,
|
||||||
|
pushManager *push.Manager,
|
||||||
wsBridge *wsbridge.Bridge,
|
wsBridge *wsbridge.Bridge,
|
||||||
cfg HostFunctionsConfig,
|
cfg HostFunctionsConfig,
|
||||||
logger *zap.Logger,
|
logger *zap.Logger,
|
||||||
@ -46,6 +51,7 @@ func NewHostFunctions(
|
|||||||
wsManager: wsManager,
|
wsManager: wsManager,
|
||||||
secrets: secrets,
|
secrets: secrets,
|
||||||
pushDispatcher: pushDispatcher,
|
pushDispatcher: pushDispatcher,
|
||||||
|
pushManager: pushManager,
|
||||||
wsBridge: wsBridge,
|
wsBridge: wsBridge,
|
||||||
httpClient: tlsutil.NewHTTPClient(httpTimeout),
|
httpClient: tlsutil.NewHTTPClient(httpTimeout),
|
||||||
logger: logger,
|
logger: logger,
|
||||||
|
|||||||
@ -33,10 +33,16 @@ const MaxPushSendArgsBytes = 16 * 1024
|
|||||||
// namespace — the dispatcher reads the namespace from the invocation
|
// namespace — the dispatcher reads the namespace from the invocation
|
||||||
// context (set by the engine before invoking).
|
// context (set by the engine before invoking).
|
||||||
//
|
//
|
||||||
// If push is not configured on this gateway (no dispatcher), this returns
|
// If push is not configured on this gateway (no dispatcher AND no
|
||||||
// nil (silent no-op) so functions remain portable across environments.
|
// manager), this returns nil (silent no-op) so functions remain portable
|
||||||
|
// across environments.
|
||||||
|
//
|
||||||
|
// When the manager is present (bug #220 follow-up), it routes through
|
||||||
|
// per-namespace config — tenants who have set their own ntfy / expo via
|
||||||
|
// PUT /v1/push/config get their providers; namespaces with no config
|
||||||
|
// fall back to the gateway YAML defaults via the manager's resolution.
|
||||||
func (h *HostFunctions) PushSend(ctx context.Context, userID string, msgJSON []byte) error {
|
func (h *HostFunctions) PushSend(ctx context.Context, userID string, msgJSON []byte) error {
|
||||||
if h.pushDispatcher == nil {
|
if h.pushManager == nil && h.pushDispatcher == nil {
|
||||||
// Silent no-op — push isn't configured on this gateway.
|
// Silent no-op — push isn't configured on this gateway.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -96,8 +102,24 @@ func (h *HostFunctions) PushSend(ctx context.Context, userID string, msgJSON []b
|
|||||||
Data: args.Data,
|
Data: args.Data,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := h.pushDispatcher.SendToUser(ctx, namespace, userID, msg); err != nil {
|
// Route through Manager when present so per-namespace push config
|
||||||
return &serverless.HostFunctionError{Function: "push_send", Cause: err}
|
// (set via PUT /v1/push/config) takes effect. The Manager itself
|
||||||
|
// falls back to YAML defaults if the namespace hasn't set anything.
|
||||||
|
var sendErr error
|
||||||
|
if h.pushManager != nil {
|
||||||
|
sendErr = h.pushManager.SendToUser(ctx, namespace, userID, msg)
|
||||||
|
// ErrPushNotConfigured here would mean: no per-namespace config
|
||||||
|
// AND no YAML defaults. Treat as silent no-op for portability —
|
||||||
|
// same contract as "no dispatcher at all". Functions can't depend
|
||||||
|
// on push being available.
|
||||||
|
if sendErr != nil && sendErr.Error() == push.ErrPushNotConfigured.Error() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sendErr = h.pushDispatcher.SendToUser(ctx, namespace, userID, msg)
|
||||||
|
}
|
||||||
|
if sendErr != nil {
|
||||||
|
return &serverless.HostFunctionError{Function: "push_send", Cause: sendErr}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -34,9 +34,13 @@ type HostFunctions struct {
|
|||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
logger *zap.Logger
|
logger *zap.Logger
|
||||||
|
|
||||||
// pushDispatcher may be nil when push isn't configured for this gateway.
|
// pushDispatcher (legacy) and pushManager (per-namespace, bug #220
|
||||||
// In that case PushSend returns nil silently — see hostfunctions/push.go.
|
// follow-up) provide push send-paths. When pushManager is set, PushSend
|
||||||
|
// uses it so per-namespace config takes effect; pushDispatcher is the
|
||||||
|
// fallback. Both nil = push not configured anywhere on this gateway,
|
||||||
|
// PushSend returns nil silently for portability.
|
||||||
pushDispatcher *push.PushDispatcher
|
pushDispatcher *push.PushDispatcher
|
||||||
|
pushManager *push.Manager
|
||||||
|
|
||||||
// wsBridge may be nil when the gateway doesn't run a bridge. In that
|
// wsBridge may be nil when the gateway doesn't run a bridge. In that
|
||||||
// case WSPubSubBridge returns an error rather than silently no-oping
|
// case WSPubSubBridge returns an error rather than silently no-oping
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
@ -420,26 +421,55 @@ func (i *Invoker) BatchInvoke(ctx context.Context, req *BatchInvokeRequest) (*Ba
|
|||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
// CanInvoke checks if a caller is authorized to invoke a function.
|
// CanInvoke checks if a caller is authorized to invoke a function.
|
||||||
|
//
|
||||||
|
// Authorization model:
|
||||||
|
// - Public functions (`is_public: true`): anyone can invoke, no auth needed.
|
||||||
|
// The auth middleware lets unauthenticated requests through to public
|
||||||
|
// paths.
|
||||||
|
// - Private functions: any caller that the auth middleware has already
|
||||||
|
// authenticated for THIS namespace can invoke. By the time we reach
|
||||||
|
// this function, the request has passed authMiddleware which validates
|
||||||
|
// EITHER a JWT-bearing token whose `namespace` claim matches the target
|
||||||
|
// namespace OR an API key resolved to the target namespace. So the
|
||||||
|
// non-empty `callerWallet` here is sufficient evidence of a verified
|
||||||
|
// in-namespace caller.
|
||||||
|
//
|
||||||
|
// History (bug #215 follow-up): the previous logic was a stub —
|
||||||
|
//
|
||||||
|
// return callerWallet == namespace || fn.CreatedBy == callerWallet, nil
|
||||||
|
//
|
||||||
|
// — which only allowed namespace-name-as-wallet (the API-key fallback) or
|
||||||
|
// the deploying wallet. Onboarding-style functions like `user-create`,
|
||||||
|
// where a brand-new wallet calls in to register, were rejected with 401
|
||||||
|
// because the new wallet was neither the namespace string nor the
|
||||||
|
// deployer. They worked only as a side-effect of JWT verification
|
||||||
|
// silently failing pre-#215, falling through to API-key auth, and
|
||||||
|
// callerWallet collapsing to the namespace string. Once JWT verify was
|
||||||
|
// fixed, the underlying flaw surfaced.
|
||||||
|
//
|
||||||
|
// Fine-grained per-function ACLs (group membership, roles) are deferred
|
||||||
|
// until there's a concrete tenant requirement. Today, "private" means
|
||||||
|
// "authenticated in-namespace caller required" and that's enforced
|
||||||
|
// here + at authMiddleware.
|
||||||
func (i *Invoker) CanInvoke(ctx context.Context, namespace, functionName string, callerWallet string) (bool, error) {
|
func (i *Invoker) CanInvoke(ctx context.Context, namespace, functionName string, callerWallet string) (bool, error) {
|
||||||
fn, err := i.registry.Get(ctx, namespace, functionName, 0)
|
fn, err := i.registry.Get(ctx, namespace, functionName, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Public functions can be invoked by anyone
|
// Public functions can be invoked by anyone (auth middleware allows
|
||||||
|
// the request through without credentials).
|
||||||
if fn.IsPublic {
|
if fn.IsPublic {
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Non-public functions require the caller to be in the same namespace
|
// Private function: require an authenticated caller. The auth
|
||||||
// (simplified authorization - can be extended)
|
// middleware has already verified the caller belongs to this
|
||||||
if callerWallet == "" {
|
// namespace (either via JWT `namespace` claim or via API-key
|
||||||
return false, nil
|
// namespace lookup) before this function ever runs, so the only
|
||||||
}
|
// thing we need to confirm here is that the caller has SOME
|
||||||
|
// identity at all (i.e. the request wasn't anonymous).
|
||||||
// For now, just check if caller wallet matches namespace
|
return strings.TrimSpace(callerWallet) != "", nil
|
||||||
// In production, you'd check group membership, roles, etc.
|
|
||||||
return callerWallet == namespace || fn.CreatedBy == callerWallet, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetFunctionInfo returns basic info about a function for invocation.
|
// GetFunctionInfo returns basic info about a function for invocation.
|
||||||
|
|||||||
138
core/pkg/serverless/invoke_can_invoke_test.go
Normal file
138
core/pkg/serverless/invoke_can_invoke_test.go
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
package serverless
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// canInvokeMockRegistry is a minimal FunctionRegistry surface for the
|
||||||
|
// CanInvoke tests. Only Get is exercised; everything else panics so
|
||||||
|
// accidental drift is loud.
|
||||||
|
type canInvokeMockRegistry struct {
|
||||||
|
FunctionRegistry // embedded interface — calling unimplemented methods panics
|
||||||
|
|
||||||
|
fn *Function
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *canInvokeMockRegistry) Get(_ context.Context, _, _ string, _ int) (*Function, error) {
|
||||||
|
if m.err != nil {
|
||||||
|
return nil, m.err
|
||||||
|
}
|
||||||
|
return m.fn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newInvokerForCanInvokeTest(fn *Function) *Invoker {
|
||||||
|
return &Invoker{
|
||||||
|
registry: &canInvokeMockRegistry{fn: fn},
|
||||||
|
logger: zap.NewNop(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCanInvoke_publicAllowsAnyone — public functions bypass the
|
||||||
|
// authorization check entirely (auth middleware lets unauthenticated
|
||||||
|
// requests through to the handler too).
|
||||||
|
func TestCanInvoke_publicAllowsAnyone(t *testing.T) {
|
||||||
|
inv := newInvokerForCanInvokeTest(&Function{
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
Name: "username-check",
|
||||||
|
IsPublic: true,
|
||||||
|
})
|
||||||
|
for _, wallet := range []string{"", "8oxu7UzzaSXc...", "anchat-test"} {
|
||||||
|
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "username-check", wallet)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("wallet=%q: %v", wallet, err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("wallet=%q: public function denied", wallet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCanInvoke_privateRejectsAnonymous — empty callerWallet means the
|
||||||
|
// auth middleware didn't establish any identity; private functions reject.
|
||||||
|
func TestCanInvoke_privateRejectsAnonymous(t *testing.T) {
|
||||||
|
inv := newInvokerForCanInvokeTest(&Function{
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
Name: "user-create",
|
||||||
|
IsPublic: false,
|
||||||
|
})
|
||||||
|
ok, _ := inv.CanInvoke(context.Background(), "anchat-test", "user-create", "")
|
||||||
|
if ok {
|
||||||
|
t.Error("anonymous caller should be denied for private function")
|
||||||
|
}
|
||||||
|
// Whitespace-only is still anonymous.
|
||||||
|
ok, _ = inv.CanInvoke(context.Background(), "anchat-test", "user-create", " ")
|
||||||
|
if ok {
|
||||||
|
t.Error("whitespace-only callerWallet should be denied")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCanInvoke_privateAllowsJWTAuthenticatedNewWallet is the regression
|
||||||
|
// guard for bug #215 follow-up. A brand-new wallet (typical signup flow,
|
||||||
|
// `user-create` style) calls a private function in a namespace it doesn't
|
||||||
|
// "own" yet. Must succeed: auth middleware already validated the JWT
|
||||||
|
// belongs to this namespace, the only role of CanInvoke is to confirm
|
||||||
|
// SOMEONE is authenticated.
|
||||||
|
//
|
||||||
|
// Pre-fix this returned false (the wallet wasn't equal to the namespace
|
||||||
|
// string and wasn't the deployer), which was the entire reason AnChat saw
|
||||||
|
// 401 unauthorized after the cluster_secret_path fix unblocked JWT
|
||||||
|
// verification.
|
||||||
|
func TestCanInvoke_privateAllowsJWTAuthenticatedNewWallet(t *testing.T) {
|
||||||
|
inv := newInvokerForCanInvokeTest(&Function{
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
Name: "user-create",
|
||||||
|
IsPublic: false,
|
||||||
|
CreatedBy: "0xdeployer-wallet-not-this-caller",
|
||||||
|
})
|
||||||
|
const newUserWallet = "8oxu7UzzaSXcxZ9B3YuEqr3Qpmx7tgT9HzaA4NUGiand"
|
||||||
|
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "user-create", newUserWallet)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CanInvoke: %v", err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("new user wallet should be allowed to invoke private function (auth middleware vouches for them)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCanInvoke_privateAllowsAPIKeyCaller — API-key callers get
|
||||||
|
// callerWallet=namespace from the wallet resolver. They should still
|
||||||
|
// succeed; this preserves the pre-#215 working flow for tenants who
|
||||||
|
// only use API keys.
|
||||||
|
func TestCanInvoke_privateAllowsAPIKeyCaller(t *testing.T) {
|
||||||
|
inv := newInvokerForCanInvokeTest(&Function{
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
Name: "user-create",
|
||||||
|
IsPublic: false,
|
||||||
|
CreatedBy: "0xdeployer-wallet",
|
||||||
|
})
|
||||||
|
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "user-create", "anchat-test")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CanInvoke: %v", err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Error("API-key callers (callerWallet=namespace) must keep working")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCanInvoke_privateAllowsDeployer — the function deployer can invoke
|
||||||
|
// their own private function. Was true pre-fix and must remain true.
|
||||||
|
func TestCanInvoke_privateAllowsDeployer(t *testing.T) {
|
||||||
|
const deployer = "0xdeployer-wallet"
|
||||||
|
inv := newInvokerForCanInvokeTest(&Function{
|
||||||
|
Namespace: "anchat-test",
|
||||||
|
Name: "user-create",
|
||||||
|
IsPublic: false,
|
||||||
|
CreatedBy: deployer,
|
||||||
|
})
|
||||||
|
ok, err := inv.CanInvoke(context.Background(), "anchat-test", "user-create", deployer)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CanInvoke: %v", err)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
t.Error("deployer must always be allowed to invoke their own function")
|
||||||
|
}
|
||||||
|
}
|
||||||
217
core/pkg/serverless/triggers/cron_parser_test.go
Normal file
217
core/pkg/serverless/triggers/cron_parser_test.go
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
package triggers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// ParseCron — accept paths
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestParseCron_fiveField(t *testing.T) {
|
||||||
|
c, err := ParseCron("0 3 * * *") // every day at 03:00 UTC
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ParseCron: %v", err)
|
||||||
|
}
|
||||||
|
if c.hasSeconds {
|
||||||
|
t.Error("hasSeconds = true, want false for 5-field expression")
|
||||||
|
}
|
||||||
|
if !c.minutes.match(0) {
|
||||||
|
t.Error("minutes mask missing 0")
|
||||||
|
}
|
||||||
|
if c.minutes.match(1) {
|
||||||
|
t.Error("minutes mask should not match 1")
|
||||||
|
}
|
||||||
|
if !c.hours.match(3) {
|
||||||
|
t.Error("hours mask missing 3")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseCron_sixField(t *testing.T) {
|
||||||
|
c, err := ParseCron("*/30 * * * * *") // every 30 seconds
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ParseCron: %v", err)
|
||||||
|
}
|
||||||
|
if !c.hasSeconds {
|
||||||
|
t.Error("hasSeconds = false, want true for 6-field expression")
|
||||||
|
}
|
||||||
|
if !c.seconds.match(0) || !c.seconds.match(30) {
|
||||||
|
t.Error("seconds mask should match 0 and 30")
|
||||||
|
}
|
||||||
|
if c.seconds.match(15) {
|
||||||
|
t.Error("seconds mask should NOT match 15 for */30")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseCron_listsAndRanges(t *testing.T) {
|
||||||
|
c, err := ParseCron("0,15,30,45 9-17 * * 1-5") // every 15min during business hours, weekdays
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ParseCron: %v", err)
|
||||||
|
}
|
||||||
|
for _, m := range []int{0, 15, 30, 45} {
|
||||||
|
if !c.minutes.match(m) {
|
||||||
|
t.Errorf("minutes mask missing %d", m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, h := range []int{9, 12, 17} {
|
||||||
|
if !c.hours.match(h) {
|
||||||
|
t.Errorf("hours mask missing %d", h)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if c.hours.match(8) || c.hours.match(18) {
|
||||||
|
t.Error("hours mask should not match outside 9-17")
|
||||||
|
}
|
||||||
|
for _, d := range []int{1, 2, 3, 4, 5} {
|
||||||
|
if !c.dow.match(d) {
|
||||||
|
t.Errorf("dow mask missing %d", d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if c.dow.match(0) || c.dow.match(6) {
|
||||||
|
t.Error("dow mask should not match weekend (0 or 6)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseCron_sundayNormalisation(t *testing.T) {
|
||||||
|
// Cron permits 0 OR 7 for Sunday. Both should be normalised to 0.
|
||||||
|
c7, err := ParseCron("0 0 * * 7")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ParseCron 7: %v", err)
|
||||||
|
}
|
||||||
|
if !c7.dow.match(0) {
|
||||||
|
t.Error("dow 7 should normalise to match 0 (Sunday)")
|
||||||
|
}
|
||||||
|
if c7.dow.match(7) {
|
||||||
|
t.Error("after normalisation, bit 7 should be cleared (clamped to 0..6)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// ParseCron — reject paths
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestParseCron_rejectMalformed(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
expr string
|
||||||
|
reason string
|
||||||
|
}{
|
||||||
|
{"", "empty"},
|
||||||
|
{" ", "whitespace-only"},
|
||||||
|
{"* * *", "too few fields"},
|
||||||
|
{"* * * * * * *", "too many fields"},
|
||||||
|
{"60 * * * *", "minute out of range"},
|
||||||
|
{"* 24 * * *", "hour out of range"},
|
||||||
|
{"* * 0 * *", "day-of-month under range"},
|
||||||
|
{"* * 32 * *", "day-of-month over range"},
|
||||||
|
{"* * * 13 *", "month over range"},
|
||||||
|
{"* * * 0 *", "month under range"},
|
||||||
|
{"* * * * 8", "day-of-week over range"},
|
||||||
|
{"abc * * * *", "non-numeric"},
|
||||||
|
{"5-2 * * * *", "inverted range"},
|
||||||
|
{"*/0 * * * *", "zero step"},
|
||||||
|
{"*/-1 * * * *", "negative step"},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.reason, func(t *testing.T) {
|
||||||
|
if _, err := ParseCron(tc.expr); err == nil {
|
||||||
|
t.Errorf("ParseCron(%q) succeeded; expected error (%s)", tc.expr, tc.reason)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// Next() — correctness
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestNext_dailyAt03UTC(t *testing.T) {
|
||||||
|
c, _ := ParseCron("0 3 * * *")
|
||||||
|
now := time.Date(2025, 5, 7, 12, 30, 0, 0, time.UTC)
|
||||||
|
got, err := c.Next(now)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Next: %v", err)
|
||||||
|
}
|
||||||
|
want := time.Date(2025, 5, 8, 3, 0, 0, 0, time.UTC)
|
||||||
|
if !got.Equal(want) {
|
||||||
|
t.Errorf("Next = %s, want %s", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNext_strictlyAfter(t *testing.T) {
|
||||||
|
// When `after` is exactly on a matching minute, Next must return the
|
||||||
|
// FOLLOWING match, not `after` itself.
|
||||||
|
c, _ := ParseCron("0 3 * * *")
|
||||||
|
now := time.Date(2025, 5, 7, 3, 0, 0, 0, time.UTC) // exactly 03:00
|
||||||
|
got, _ := c.Next(now)
|
||||||
|
want := time.Date(2025, 5, 8, 3, 0, 0, 0, time.UTC)
|
||||||
|
if !got.Equal(want) {
|
||||||
|
t.Errorf("Next from exact-match should advance one day: got %s, want %s", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNext_secondsResolution(t *testing.T) {
|
||||||
|
c, _ := ParseCron("*/30 * * * * *") // every 30 sec
|
||||||
|
now := time.Date(2025, 5, 7, 12, 0, 5, 0, time.UTC)
|
||||||
|
got, _ := c.Next(now)
|
||||||
|
want := time.Date(2025, 5, 7, 12, 0, 30, 0, time.UTC)
|
||||||
|
if !got.Equal(want) {
|
||||||
|
t.Errorf("Next = %s, want %s", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNext_monthSkip(t *testing.T) {
|
||||||
|
// Day-of-month=29, only February — must skip to the next leap year.
|
||||||
|
c, _ := ParseCron("0 0 29 2 *")
|
||||||
|
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||||
|
got, err := c.Next(now)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Next: %v", err)
|
||||||
|
}
|
||||||
|
want := time.Date(2028, 2, 29, 0, 0, 0, 0, time.UTC)
|
||||||
|
if !got.Equal(want) {
|
||||||
|
t.Errorf("Next leap = %s, want %s", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNext_impossibleScheduleFails(t *testing.T) {
|
||||||
|
// February 30th never exists.
|
||||||
|
c, _ := ParseCron("0 0 30 2 *")
|
||||||
|
now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||||
|
_, err := c.Next(now)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("Next should fail for impossible date combo (Feb 30)")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "no match") {
|
||||||
|
t.Errorf("error = %v, want 'no match' substring", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNext_weekdayOnly(t *testing.T) {
|
||||||
|
c, _ := ParseCron("0 9 * * 1") // every Monday at 09:00
|
||||||
|
// Wednesday May 7, 2025
|
||||||
|
now := time.Date(2025, 5, 7, 12, 0, 0, 0, time.UTC)
|
||||||
|
got, _ := c.Next(now)
|
||||||
|
// Next Monday is May 12, 2025
|
||||||
|
want := time.Date(2025, 5, 12, 9, 0, 0, 0, time.UTC)
|
||||||
|
if !got.Equal(want) {
|
||||||
|
t.Errorf("Next weekday = %s, want %s", got, want)
|
||||||
|
}
|
||||||
|
if got.Weekday() != time.Monday {
|
||||||
|
t.Errorf("expected Monday, got %s", got.Weekday())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// fieldMatcher
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestFieldMatcher_outOfRange(t *testing.T) {
|
||||||
|
var m fieldMatcher = 0xFFFF
|
||||||
|
if m.match(-1) {
|
||||||
|
t.Error("match(-1) should be false")
|
||||||
|
}
|
||||||
|
if m.match(64) {
|
||||||
|
t.Error("match(64) should be false (mask is uint64, bits 0..63 only)")
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -128,17 +128,38 @@ func (s *CronScheduler) dispatch(ctx context.Context, row CronDueRow, now time.T
|
|||||||
zap.String("expr", row.CronExpression),
|
zap.String("expr", row.CronExpression),
|
||||||
zap.Error(err))
|
zap.Error(err))
|
||||||
// Push next_run_at far out so we don't keep looking at this row.
|
// Push next_run_at far out so we don't keep looking at this row.
|
||||||
_ = s.store.MarkRun(ctx, row.TriggerID, now, now.Add(365*24*time.Hour),
|
// Best-effort; if the lease is lost (ErrAlreadyRan) another gateway
|
||||||
|
// already saw the same bad expression and is handling it.
|
||||||
|
_ = s.store.MarkRun(ctx, row.TriggerID, row.NextRunAt, now, now.Add(365*24*time.Hour),
|
||||||
"error", "bad cron expression: "+err.Error())
|
"error", "bad cron expression: "+err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
next, err := parsed.Next(now)
|
next, err := parsed.Next(now)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
_ = s.store.MarkRun(ctx, row.TriggerID, now, now.Add(365*24*time.Hour),
|
_ = s.store.MarkRun(ctx, row.TriggerID, row.NextRunAt, now, now.Add(365*24*time.Hour),
|
||||||
"error", "no next match: "+err.Error())
|
"error", "no next match: "+err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Claim the lease BEFORE invoking. The compare-and-swap on next_run_at
|
||||||
|
// ensures only one gateway wins the race when multiple instances see
|
||||||
|
// the same trigger as due. Pre-emptive claim (advance to the new next
|
||||||
|
// + provisional "running" status) means we don't issue duplicate
|
||||||
|
// invokes; the loser bails here. We patch in the real status after the
|
||||||
|
// invoke completes via a follow-up unconditional UPDATE keyed on the
|
||||||
|
// new next_run_at — the loser can't accidentally clobber that because
|
||||||
|
// its expected-old next_run_at no longer matches anything.
|
||||||
|
if err := s.store.MarkRun(ctx, row.TriggerID, row.NextRunAt, now, next, "running", ""); err != nil {
|
||||||
|
if err == ErrAlreadyRan {
|
||||||
|
// Another gateway claimed this tick. Skip silently.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.logger.Warn("cron scheduler: failed to claim lease",
|
||||||
|
zap.String("trigger_id", row.TriggerID),
|
||||||
|
zap.Error(err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
req := &serverless.InvokeRequest{
|
req := &serverless.InvokeRequest{
|
||||||
Namespace: row.Namespace,
|
Namespace: row.Namespace,
|
||||||
FunctionName: row.FunctionName,
|
FunctionName: row.FunctionName,
|
||||||
@ -160,8 +181,12 @@ func (s *CronScheduler) dispatch(ctx context.Context, row CronDueRow, now time.T
|
|||||||
status = "error"
|
status = "error"
|
||||||
errMsg = resp.Error
|
errMsg = resp.Error
|
||||||
}
|
}
|
||||||
if err := s.store.MarkRun(ctx, row.TriggerID, now, next, status, errMsg); err != nil {
|
// Patch the result over the provisional "running" row. We're CAS'd on
|
||||||
s.logger.Warn("cron scheduler: MarkRun failed",
|
// the (now) post-claim next_run_at so a stale loser from another node
|
||||||
|
// can't overwrite — though by construction, only this gateway holds
|
||||||
|
// the lease at this point.
|
||||||
|
if err := s.store.MarkRun(ctx, row.TriggerID, next, now, next, status, errMsg); err != nil && err != ErrAlreadyRan {
|
||||||
|
s.logger.Warn("cron scheduler: failed to record run result",
|
||||||
zap.String("trigger_id", row.TriggerID),
|
zap.String("trigger_id", row.TriggerID),
|
||||||
zap.Error(err))
|
zap.Error(err))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -43,6 +43,12 @@ type cronRow struct {
|
|||||||
|
|
||||||
// CronDueRow is the JOINed row returned by ListDue: trigger + the function
|
// CronDueRow is the JOINed row returned by ListDue: trigger + the function
|
||||||
// metadata the scheduler needs to invoke it.
|
// metadata the scheduler needs to invoke it.
|
||||||
|
//
|
||||||
|
// NextRunAt is captured at ListDue time and passed back into MarkRun as the
|
||||||
|
// expected-old value for the compare-and-swap UPDATE. Two gateways racing
|
||||||
|
// the same trigger will both see the same NextRunAt; only the first MarkRun
|
||||||
|
// wins (1 row affected), the loser gets 0 rows affected and the scheduler
|
||||||
|
// skips the duplicate invoke.
|
||||||
type CronDueRow struct {
|
type CronDueRow struct {
|
||||||
TriggerID string
|
TriggerID string
|
||||||
FunctionID string
|
FunctionID string
|
||||||
@ -156,6 +162,11 @@ func (s *CronTriggerStore) ListDue(ctx context.Context, now time.Time, limit int
|
|||||||
if limit <= 0 {
|
if limit <= 0 {
|
||||||
limit = 100
|
limit = 100
|
||||||
}
|
}
|
||||||
|
// `next_run_at IS NOT NULL` is defensive: the schema permits NULL and
|
||||||
|
// `Add` always populates it, but a manual DB poke or future migration
|
||||||
|
// could leave a NULL row that would otherwise sort first and behave
|
||||||
|
// unpredictably under `<=`. Guarding here keeps the scheduler's
|
||||||
|
// invariants explicit.
|
||||||
query := `
|
query := `
|
||||||
SELECT t.id AS trigger_id, t.function_id AS function_id,
|
SELECT t.id AS trigger_id, t.function_id AS function_id,
|
||||||
f.name AS function_name, f.namespace AS namespace,
|
f.name AS function_name, f.namespace AS namespace,
|
||||||
@ -164,6 +175,7 @@ func (s *CronTriggerStore) ListDue(ctx context.Context, now time.Time, limit int
|
|||||||
JOIN functions f ON t.function_id = f.id
|
JOIN functions f ON t.function_id = f.id
|
||||||
WHERE t.enabled = TRUE
|
WHERE t.enabled = TRUE
|
||||||
AND f.status = 'active'
|
AND f.status = 'active'
|
||||||
|
AND t.next_run_at IS NOT NULL
|
||||||
AND t.next_run_at <= ?
|
AND t.next_run_at <= ?
|
||||||
ORDER BY t.next_run_at ASC
|
ORDER BY t.next_run_at ASC
|
||||||
LIMIT ?
|
LIMIT ?
|
||||||
@ -178,9 +190,20 @@ func (s *CronTriggerStore) ListDue(ctx context.Context, now time.Time, limit int
|
|||||||
// MarkRun updates next_run_at + last_run_at + last_status / last_error
|
// MarkRun updates next_run_at + last_run_at + last_status / last_error
|
||||||
// after the scheduler invokes a trigger. Idempotent: if the row was
|
// after the scheduler invokes a trigger. Idempotent: if the row was
|
||||||
// removed concurrently, the UPDATE is a no-op.
|
// removed concurrently, the UPDATE is a no-op.
|
||||||
|
//
|
||||||
|
// expectedNextRunAt enables the soft-lease semantics promised by the
|
||||||
|
// scheduler comment: the UPDATE is gated on the row's current next_run_at
|
||||||
|
// matching what ListDue read. Two gateways racing the same tick will both
|
||||||
|
// see the same expectedNextRunAt; only one's UPDATE matches a row, the
|
||||||
|
// other gets RowsAffected==0 and ErrAlreadyRan back so the scheduler can
|
||||||
|
// skip the duplicate invoke.
|
||||||
|
//
|
||||||
|
// Returns ErrAlreadyRan when the compare-and-swap missed (another node won
|
||||||
|
// the race or the row was deleted). Other errors are wrapped DB errors.
|
||||||
func (s *CronTriggerStore) MarkRun(
|
func (s *CronTriggerStore) MarkRun(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
triggerID string,
|
triggerID string,
|
||||||
|
expectedNextRunAt time.Time,
|
||||||
ranAt time.Time,
|
ranAt time.Time,
|
||||||
nextRunAt time.Time,
|
nextRunAt time.Time,
|
||||||
status string,
|
status string,
|
||||||
@ -189,8 +212,20 @@ func (s *CronTriggerStore) MarkRun(
|
|||||||
query := `
|
query := `
|
||||||
UPDATE function_cron_triggers
|
UPDATE function_cron_triggers
|
||||||
SET last_run_at = ?, next_run_at = ?, last_status = ?, last_error = ?
|
SET last_run_at = ?, next_run_at = ?, last_status = ?, last_error = ?
|
||||||
WHERE id = ?
|
WHERE id = ? AND next_run_at = ?
|
||||||
`
|
`
|
||||||
_, err := s.db.Exec(ctx, query, ranAt, nextRunAt, status, errMsg, triggerID)
|
res, err := s.db.Exec(ctx, query, ranAt, nextRunAt, status, errMsg, triggerID, expectedNextRunAt)
|
||||||
return err
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to MarkRun cron trigger: %w", err)
|
||||||
|
}
|
||||||
|
if affected, _ := res.RowsAffected(); affected == 0 {
|
||||||
|
return ErrAlreadyRan
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ErrAlreadyRan is returned by MarkRun when the compare-and-swap on
|
||||||
|
// next_run_at missed — either another gateway claimed this tick first, or
|
||||||
|
// the trigger was deleted concurrently. The scheduler treats it as "skip
|
||||||
|
// the duplicate invoke result" rather than a hard error.
|
||||||
|
var ErrAlreadyRan = fmt.Errorf("cron trigger already advanced by another node")
|
||||||
|
|||||||
120
core/pkg/serverless/triggers/cron_store_markrun_test.go
Normal file
120
core/pkg/serverless/triggers/cron_store_markrun_test.go
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
package triggers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/rqlite"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
// markRunMockDB is a minimal rqlite.Client surface for MarkRun tests.
|
||||||
|
// Only Exec is exercised — anything else panics so accidental drift is loud.
|
||||||
|
type markRunMockDB struct {
|
||||||
|
rqlite.Client // embedded interface — calling any unimplemented method panics
|
||||||
|
|
||||||
|
lastQuery string
|
||||||
|
lastArgs []any
|
||||||
|
rows int64
|
||||||
|
execErr error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *markRunMockDB) Exec(_ context.Context, query string, args ...any) (sql.Result, error) {
|
||||||
|
m.lastQuery = query
|
||||||
|
m.lastArgs = args
|
||||||
|
if m.execErr != nil {
|
||||||
|
return nil, m.execErr
|
||||||
|
}
|
||||||
|
return &fakeResult{rows: m.rows}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeResult struct {
|
||||||
|
rows int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *fakeResult) LastInsertId() (int64, error) { return 0, nil }
|
||||||
|
func (f *fakeResult) RowsAffected() (int64, error) { return f.rows, nil }
|
||||||
|
|
||||||
|
// TestMarkRun_compareAndSwapMisses guards bug #65 audit B6: when two gateways
|
||||||
|
// race the same tick, only the first MarkRun wins and the second must return
|
||||||
|
// ErrAlreadyRan so the caller can skip the duplicate invoke.
|
||||||
|
func TestMarkRun_compareAndSwapMisses(t *testing.T) {
|
||||||
|
db := &markRunMockDB{rows: 0} // simulate "no row matched the WHERE"
|
||||||
|
store := NewCronTriggerStore(db, zap.NewNop())
|
||||||
|
|
||||||
|
expected := time.Date(2025, 5, 7, 12, 0, 0, 0, time.UTC)
|
||||||
|
err := store.MarkRun(context.Background(),
|
||||||
|
"trigger-id", expected, expected.Add(time.Second),
|
||||||
|
expected.Add(time.Minute), "ok", "")
|
||||||
|
if !errors.Is(err, ErrAlreadyRan) {
|
||||||
|
t.Fatalf("MarkRun = %v, want ErrAlreadyRan", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMarkRun_compareAndSwapWins covers the happy path: 1 row affected, no
|
||||||
|
// error returned. This pairs with the misses-test to fully cover the lease
|
||||||
|
// branch.
|
||||||
|
func TestMarkRun_compareAndSwapWins(t *testing.T) {
|
||||||
|
db := &markRunMockDB{rows: 1}
|
||||||
|
store := NewCronTriggerStore(db, zap.NewNop())
|
||||||
|
|
||||||
|
expected := time.Date(2025, 5, 7, 12, 0, 0, 0, time.UTC)
|
||||||
|
err := store.MarkRun(context.Background(),
|
||||||
|
"trigger-id", expected, expected.Add(time.Second),
|
||||||
|
expected.Add(time.Minute), "ok", "")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("MarkRun (winning CAS) = %v, want nil", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMarkRun_dbErrorPropagates ensures a real database failure is wrapped
|
||||||
|
// and returned, NOT collapsed into ErrAlreadyRan.
|
||||||
|
func TestMarkRun_dbErrorPropagates(t *testing.T) {
|
||||||
|
dbErr := errors.New("rqlite unavailable")
|
||||||
|
db := &markRunMockDB{execErr: dbErr}
|
||||||
|
store := NewCronTriggerStore(db, zap.NewNop())
|
||||||
|
|
||||||
|
err := store.MarkRun(context.Background(),
|
||||||
|
"trigger-id", time.Now(), time.Now(), time.Now(), "ok", "")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("MarkRun with DB error returned nil")
|
||||||
|
}
|
||||||
|
if errors.Is(err, ErrAlreadyRan) {
|
||||||
|
t.Fatal("DB error should not surface as ErrAlreadyRan")
|
||||||
|
}
|
||||||
|
if !errors.Is(err, dbErr) {
|
||||||
|
t.Errorf("error chain missing wrapped DB error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestMarkRun_compareAndSwapClauseInQuery verifies the SQL actually contains
|
||||||
|
// the `next_run_at = ?` predicate — without it the lease semantics are
|
||||||
|
// silently broken even though tests with a 1-row mock would still "pass".
|
||||||
|
func TestMarkRun_compareAndSwapClauseInQuery(t *testing.T) {
|
||||||
|
db := &markRunMockDB{rows: 1}
|
||||||
|
store := NewCronTriggerStore(db, zap.NewNop())
|
||||||
|
|
||||||
|
_ = store.MarkRun(context.Background(), "id",
|
||||||
|
time.Now(), time.Now(), time.Now(), "ok", "")
|
||||||
|
|
||||||
|
if db.lastQuery == "" {
|
||||||
|
t.Fatal("Exec was not called")
|
||||||
|
}
|
||||||
|
// The exact SQL is whitespace-formatted; just check the lease clause is
|
||||||
|
// present.
|
||||||
|
if !contains(db.lastQuery, "next_run_at = ?") {
|
||||||
|
t.Errorf("MarkRun SQL missing CAS clause:\n%s", db.lastQuery)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func contains(haystack, needle string) bool {
|
||||||
|
for i := 0; i+len(needle) <= len(haystack); i++ {
|
||||||
|
if haystack[i:i+len(needle)] == needle {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user