mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-06-16 22:54:12 +00:00
feat(serverless): add raw http response mode and secrets encryption
- Add `raw_http_response` configuration to functions to allow verbatim HTTP responses - Implement cluster-wide secrets encryption key generation and distribution for serverless functions - Update documentation with UnifiedPush support for ntfy on Android/GrapheneOS
This commit is contained in:
parent
aa04ab5f50
commit
f41242538e
@ -214,6 +214,43 @@ your client computes locally from `(namespace, userId, topic_secret)`.
|
||||
|
||||
For `ntfy` with `topic_mode=path`, the token is `ns/<namespace>/<userId>`.
|
||||
|
||||
### UnifiedPush (Android / GrapheneOS, no Google Play Services)
|
||||
|
||||
ntfy is a [UnifiedPush](https://unifiedpush.org) distributor, so Android
|
||||
devices — including de-Googled **GrapheneOS** — can receive push **without
|
||||
Firebase / Google Play Services**. The flow:
|
||||
|
||||
1. The device runs a UnifiedPush **distributor** (the ntfy Android app, or an
|
||||
embedded distributor library) pointed at your push host
|
||||
(`https://push.<your-zone>`).
|
||||
2. The app registers with the distributor and is handed an **endpoint URL**,
|
||||
e.g. `https://push.<your-zone>/upXXXXXXXX`.
|
||||
3. Register that endpoint as a push device:
|
||||
|
||||
```http
|
||||
POST /v1/push/devices
|
||||
{
|
||||
"device_id": "<unique per-device ID>",
|
||||
"provider": "ntfy",
|
||||
"token": "https://push.<your-zone>/upXXXXXXXX", // the full endpoint
|
||||
"platform": "android"
|
||||
}
|
||||
```
|
||||
|
||||
The gateway POSTs to the endpoint **verbatim** (per the UnifiedPush spec), so
|
||||
you don't have to deconstruct it. As a safety measure the endpoint's
|
||||
scheme+host **must match your configured ntfy push host** — a device token can
|
||||
only ever publish to your own push server, never an arbitrary host.
|
||||
|
||||
You may instead register just the bare **topic** (the endpoint's last path
|
||||
segment) as the token — both forms work; use whichever your UnifiedPush library
|
||||
makes convenient.
|
||||
|
||||
**GrapheneOS notes:** works under both "No Google Play" and "Sandboxed Google
|
||||
Play" profiles. The distributor holds the persistent connection (not your app),
|
||||
so battery impact is the distributor's; high-priority messages
|
||||
(`priority: "high"`) wake the app from Doze.
|
||||
|
||||
---
|
||||
|
||||
## Step 6 — Send pushes
|
||||
|
||||
15
core/migrations/029_raw_http_response.sql
Normal file
15
core/migrations/029_raw_http_response.sql
Normal file
@ -0,0 +1,15 @@
|
||||
-- =============================================================================
|
||||
-- 029_raw_http_response.sql
|
||||
--
|
||||
-- Raw-HTTP-response serverless function mode — bugboard #835.
|
||||
--
|
||||
-- When raw_http_response is true, the function may call the set_http_response
|
||||
-- host function to emit a verbatim HTTP response (status + headers + body)
|
||||
-- instead of the JSON/Ack-wrapped output. This lets a namespace app proxy an
|
||||
-- upstream RPC (Helius / Alchemy) transparently. See pkg/serverless/raw_http.go.
|
||||
--
|
||||
-- Default false → backward compatible: existing functions keep returning the
|
||||
-- JSON/Ack-wrapped output unchanged.
|
||||
-- =============================================================================
|
||||
|
||||
ALTER TABLE functions ADD COLUMN raw_http_response BOOLEAN DEFAULT FALSE;
|
||||
@ -32,6 +32,11 @@ type FunctionConfig struct {
|
||||
WSIdleTimeoutSec int `yaml:"ws_idle_timeout_sec"`
|
||||
WSMaxFrameBytes int `yaml:"ws_max_frame_bytes"`
|
||||
WSMaxInflightPerConn int `yaml:"ws_max_inflight_per_conn"`
|
||||
|
||||
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835) — the
|
||||
// function may call set_http_response to emit a verbatim HTTP response
|
||||
// (status/headers/body) instead of the JSON/Ack-wrapped output.
|
||||
RawHTTPResponse bool `yaml:"raw_http_response"`
|
||||
}
|
||||
|
||||
// RetryConfig holds retry settings.
|
||||
@ -226,6 +231,9 @@ func uploadWASMFunction(wasmPath string, cfg *FunctionConfig) (map[string]interf
|
||||
if cfg.WSMaxInflightPerConn > 0 {
|
||||
metaObj["ws_max_inflight_per_conn"] = cfg.WSMaxInflightPerConn
|
||||
}
|
||||
if cfg.RawHTTPResponse {
|
||||
metaObj["raw_http_response"] = true
|
||||
}
|
||||
if len(metaObj) > 0 {
|
||||
metadata, _ := json.Marshal(metaObj)
|
||||
writer.WriteField("metadata", string(metadata))
|
||||
|
||||
53
core/pkg/cli/functions/helpers_test.go
Normal file
53
core/pkg/cli/functions/helpers_test.go
Normal file
@ -0,0 +1,53 @@
|
||||
package functions
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// writeFunctionYAML writes a function.yaml into a fresh temp dir and returns it.
|
||||
func writeFunctionYAML(t *testing.T, body string) string {
|
||||
t.Helper()
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "function.yaml"), []byte(body), 0o600); err != nil {
|
||||
t.Fatalf("write function.yaml: %v", err)
|
||||
}
|
||||
return dir
|
||||
}
|
||||
|
||||
func TestLoadConfig_RawHTTPResponse_true(t *testing.T) {
|
||||
dir := writeFunctionYAML(t, "name: rpc-proxy\nraw_http_response: true\n")
|
||||
|
||||
cfg, err := LoadConfig(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadConfig: %v", err)
|
||||
}
|
||||
if !cfg.RawHTTPResponse {
|
||||
t.Error("RawHTTPResponse = false, want true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_RawHTTPResponse_defaultsFalse(t *testing.T) {
|
||||
dir := writeFunctionYAML(t, "name: plain-fn\n")
|
||||
|
||||
cfg, err := LoadConfig(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadConfig: %v", err)
|
||||
}
|
||||
if cfg.RawHTTPResponse {
|
||||
t.Error("RawHTTPResponse = true, want false (omitted in yaml)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_RawHTTPResponse_explicitFalse(t *testing.T) {
|
||||
dir := writeFunctionYAML(t, "name: plain-fn\nraw_http_response: false\n")
|
||||
|
||||
cfg, err := LoadConfig(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("LoadConfig: %v", err)
|
||||
}
|
||||
if cfg.RawHTTPResponse {
|
||||
t.Error("RawHTTPResponse = true, want false")
|
||||
}
|
||||
}
|
||||
@ -477,6 +477,14 @@ func (o *Orchestrator) saveSecretsFromJoinResponse(resp *joinhandlers.JoinRespon
|
||||
}
|
||||
}
|
||||
|
||||
// Write serverless secrets encryption key (bugboard #837) — identical on
|
||||
// every node so namespace function secrets decrypt cluster-wide.
|
||||
if resp.SecretsEncryptionKey != "" {
|
||||
if err := os.WriteFile(filepath.Join(secretsDir, "secrets-encryption-key"), []byte(resp.SecretsEncryptionKey), 0600); err != nil {
|
||||
return fmt.Errorf("failed to write secrets-encryption-key: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Write IPFS Cluster trusted peer IDs
|
||||
if len(resp.IPFSClusterPeerIDs) > 0 {
|
||||
content := strings.Join(resp.IPFSClusterPeerIDs, "\n") + "\n"
|
||||
|
||||
@ -200,6 +200,18 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri
|
||||
data.Environment = cg.Environment
|
||||
data.OperatorWallet = cg.OperatorWallet
|
||||
|
||||
// Serverless function secrets encryption key (bugboard #837). Read the
|
||||
// persisted key (generated in Phase3 / received via join) so it is
|
||||
// rendered into node.yaml under http_gateway. If the file is missing the
|
||||
// key is left empty and omitted from the rendered config — get_secret then
|
||||
// stays disabled until the operator provisions the key. We deliberately do
|
||||
// NOT generate here: generation/distribution is owned by SecretGenerator
|
||||
// and the join flow so every node in a cluster shares one key.
|
||||
secretsKeyPath := filepath.Join(cg.oramaDir, "secrets", "secrets-encryption-key")
|
||||
if keyBytes, err := os.ReadFile(secretsKeyPath); err == nil {
|
||||
data.SecretsEncryptionKey = strings.TrimSpace(string(keyBytes))
|
||||
}
|
||||
|
||||
return templates.RenderNodeConfig(data)
|
||||
}
|
||||
|
||||
@ -471,6 +483,55 @@ func (sg *SecretGenerator) EnsureAPIKeyHMACSecret() (string, error) {
|
||||
return secret, nil
|
||||
}
|
||||
|
||||
// EnsureSecretsEncryptionKey gets or generates the AES-256 key used to
|
||||
// encrypt serverless function secrets at rest (the function_secrets table).
|
||||
// The key is a 32-byte random value stored as 64 hex characters.
|
||||
//
|
||||
// It MUST be identical on every namespace-gateway node in a cluster and
|
||||
// stable across restarts — otherwise secrets encrypted by one process can't
|
||||
// be decrypted by another (bugboard #837). Like api-key-hmac-secret, joining
|
||||
// nodes receive this value through the join flow rather than generating their
|
||||
// own; this method only generates on the genesis node (or returns the
|
||||
// existing key if a joining node already wrote it to disk).
|
||||
func (sg *SecretGenerator) EnsureSecretsEncryptionKey() (string, error) {
|
||||
secretPath := filepath.Join(sg.oramaDir, "secrets", "secrets-encryption-key")
|
||||
secretDir := filepath.Dir(secretPath)
|
||||
|
||||
if err := os.MkdirAll(secretDir, 0700); err != nil {
|
||||
return "", fmt.Errorf("failed to create secrets directory: %w", err)
|
||||
}
|
||||
if err := os.Chmod(secretDir, 0700); err != nil {
|
||||
return "", fmt.Errorf("failed to set secrets directory permissions: %w", err)
|
||||
}
|
||||
|
||||
// Try to read existing key
|
||||
if data, err := os.ReadFile(secretPath); err == nil {
|
||||
key := strings.TrimSpace(string(data))
|
||||
if len(key) == 64 {
|
||||
if err := ensureSecretFilePermissions(secretPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return key, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Generate new key (32 bytes = 64 hex chars)
|
||||
keyBytes := make([]byte, 32)
|
||||
if _, err := rand.Read(keyBytes); err != nil {
|
||||
return "", fmt.Errorf("failed to generate secrets encryption key: %w", err)
|
||||
}
|
||||
key := hex.EncodeToString(keyBytes)
|
||||
|
||||
if err := os.WriteFile(secretPath, []byte(key), 0600); err != nil {
|
||||
return "", fmt.Errorf("failed to save secrets encryption key: %w", err)
|
||||
}
|
||||
if err := ensureSecretFilePermissions(secretPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return key, nil
|
||||
}
|
||||
|
||||
func ensureSecretFilePermissions(secretPath string) error {
|
||||
if err := os.Chmod(secretPath, 0600); err != nil {
|
||||
return fmt.Errorf("failed to set permissions on %s: %w", secretPath, err)
|
||||
|
||||
@ -593,6 +593,12 @@ func (ps *ProductionSetup) Phase3GenerateSecrets() error {
|
||||
}
|
||||
ps.logf(" ✓ API key HMAC secret ensured")
|
||||
|
||||
// Serverless function secrets encryption key (bugboard #837)
|
||||
if _, err := ps.secretGenerator.EnsureSecretsEncryptionKey(); err != nil {
|
||||
return fmt.Errorf("failed to ensure secrets encryption key: %w", err)
|
||||
}
|
||||
ps.logf(" ✓ Secrets encryption key ensured")
|
||||
|
||||
// Node identity (unified architecture)
|
||||
peerID, err := ps.secretGenerator.EnsureNodeIdentity()
|
||||
if err != nil {
|
||||
|
||||
@ -0,0 +1,80 @@
|
||||
package production
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestEnsureSecretsEncryptionKey_generatesAndPersists verifies that a fresh
|
||||
// oramaDir produces a valid 32-byte hex key written to disk.
|
||||
func TestEnsureSecretsEncryptionKey_generatesAndPersists(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
sg := NewSecretGenerator(dir)
|
||||
|
||||
key, err := sg.EnsureSecretsEncryptionKey()
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureSecretsEncryptionKey failed: %v", err)
|
||||
}
|
||||
if len(key) != 64 {
|
||||
t.Fatalf("expected 64 hex chars, got %d (%q)", len(key), key)
|
||||
}
|
||||
raw, err := hex.DecodeString(key)
|
||||
if err != nil || len(raw) != 32 {
|
||||
t.Fatalf("key is not 32 bytes hex: err=%v len=%d", err, len(raw))
|
||||
}
|
||||
|
||||
// Persisted to the expected path.
|
||||
data, err := os.ReadFile(filepath.Join(dir, "secrets", "secrets-encryption-key"))
|
||||
if err != nil {
|
||||
t.Fatalf("reading persisted key failed: %v", err)
|
||||
}
|
||||
if strings.TrimSpace(string(data)) != key {
|
||||
t.Errorf("persisted key %q != returned key %q", strings.TrimSpace(string(data)), key)
|
||||
}
|
||||
}
|
||||
|
||||
// TestEnsureSecretsEncryptionKey_idempotent verifies the key is stable across
|
||||
// calls — this is the property that makes secrets survive restarts and stay
|
||||
// identical across cluster nodes (bugboard #837).
|
||||
func TestEnsureSecretsEncryptionKey_idempotent(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
sg := NewSecretGenerator(dir)
|
||||
|
||||
first, err := sg.EnsureSecretsEncryptionKey()
|
||||
if err != nil {
|
||||
t.Fatalf("first call failed: %v", err)
|
||||
}
|
||||
second, err := sg.EnsureSecretsEncryptionKey()
|
||||
if err != nil {
|
||||
t.Fatalf("second call failed: %v", err)
|
||||
}
|
||||
if first != second {
|
||||
t.Errorf("key changed between calls: %q != %q", first, second)
|
||||
}
|
||||
}
|
||||
|
||||
// TestEnsureSecretsEncryptionKey_regeneratesInvalid verifies a corrupt/empty
|
||||
// on-disk key (wrong length) is replaced with a fresh valid one.
|
||||
func TestEnsureSecretsEncryptionKey_regeneratesInvalid(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
secretsDir := filepath.Join(dir, "secrets")
|
||||
if err := os.MkdirAll(secretsDir, 0700); err != nil {
|
||||
t.Fatalf("mkdir failed: %v", err)
|
||||
}
|
||||
keyPath := filepath.Join(secretsDir, "secrets-encryption-key")
|
||||
if err := os.WriteFile(keyPath, []byte("too-short"), 0600); err != nil {
|
||||
t.Fatalf("write failed: %v", err)
|
||||
}
|
||||
|
||||
sg := NewSecretGenerator(dir)
|
||||
key, err := sg.EnsureSecretsEncryptionKey()
|
||||
if err != nil {
|
||||
t.Fatalf("EnsureSecretsEncryptionKey failed: %v", err)
|
||||
}
|
||||
if len(key) != 64 {
|
||||
t.Errorf("expected regenerated 64-char key, got %d (%q)", len(key), key)
|
||||
}
|
||||
}
|
||||
@ -88,6 +88,12 @@ http_gateway:
|
||||
ipfs_cluster_api_url: "http://localhost:{{.ClusterAPIPort}}"
|
||||
ipfs_api_url: "http://localhost:{{.IPFSAPIPort}}"
|
||||
ipfs_timeout: "60s"
|
||||
{{- if .SecretsEncryptionKey}}
|
||||
# Serverless function secrets encryption key (AES-256, hex). Must be
|
||||
# identical on every namespace-gateway node and stable across restarts
|
||||
# (bugboard #837). Sourced from ~/.orama/secrets/secrets-encryption-key.
|
||||
secrets_encryption_key: "{{.SecretsEncryptionKey}}"
|
||||
{{- end}}
|
||||
|
||||
# Routes for internal service reverse proxy (kept for backwards compatibility but not used by full gateway)
|
||||
routes: {}
|
||||
|
||||
@ -46,6 +46,15 @@ type NodeConfigData struct {
|
||||
SSHUser string // SSH user for remote management
|
||||
Environment string // Environment name (devnet, testnet, etc.)
|
||||
OperatorWallet string // Operator wallet address
|
||||
|
||||
// SecretsEncryptionKey is the AES-256 key (hex, 64 chars) used to encrypt
|
||||
// serverless function secrets at rest. Rendered under http_gateway in
|
||||
// node.yaml. Sourced from ~/.orama/secrets/secrets-encryption-key — must
|
||||
// be identical across all namespace-gateway nodes in a cluster and stable
|
||||
// across restarts (bugboard #837). Empty → key omitted from the rendered
|
||||
// config (the gateway then reads the secret file directly / get_secret
|
||||
// stays disabled until the key is configured).
|
||||
SecretsEncryptionKey string
|
||||
}
|
||||
|
||||
// GatewayConfigData holds parameters for gateway.yaml rendering
|
||||
|
||||
@ -41,6 +41,32 @@ func TestRenderNodeConfig(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderNodeConfig_secretsEncryptionKey(t *testing.T) {
|
||||
const key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
|
||||
|
||||
// Happy path: key present → rendered under http_gateway.
|
||||
withKey, err := RenderNodeConfig(NodeConfigData{
|
||||
NodeID: "node1",
|
||||
SecretsEncryptionKey: key,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderNodeConfig failed: %v", err)
|
||||
}
|
||||
want := "secrets_encryption_key: \"" + key + "\""
|
||||
if !strings.Contains(withKey, want) {
|
||||
t.Errorf("rendered node config missing secrets key line %q\n---\n%s", want, withKey)
|
||||
}
|
||||
|
||||
// Edge case: empty key → line omitted entirely (no empty value rendered).
|
||||
withoutKey, err := RenderNodeConfig(NodeConfigData{NodeID: "node1"})
|
||||
if err != nil {
|
||||
t.Fatalf("RenderNodeConfig failed: %v", err)
|
||||
}
|
||||
if strings.Contains(withoutKey, "secrets_encryption_key") {
|
||||
t.Errorf("empty key should omit secrets_encryption_key line, got:\n%s", withoutKey)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRenderGatewayConfig(t *testing.T) {
|
||||
bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890"
|
||||
data := GatewayConfigData{
|
||||
|
||||
@ -51,6 +51,14 @@ type Config struct {
|
||||
// Loaded from ~/.orama/secrets/api-key-hmac-secret.
|
||||
APIKeyHMACSecret string
|
||||
|
||||
// SecretsEncryptionKey is the AES-256 key (32 bytes, hex-encoded → 64
|
||||
// hex chars) used to encrypt serverless function secrets at rest in the
|
||||
// function_secrets table. It MUST be identical on every namespace-gateway
|
||||
// node in a cluster and stable across restarts — otherwise secrets
|
||||
// encrypted by one process cannot be decrypted by another (bugboard #837).
|
||||
// Loaded from ~/.orama/secrets/secrets-encryption-key.
|
||||
SecretsEncryptionKey string
|
||||
|
||||
// WebRTC configuration (set when namespace has WebRTC enabled).
|
||||
//
|
||||
// WebRTCEnabled is RETAINED for back-compat with operator YAML and
|
||||
|
||||
@ -469,9 +469,17 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
|
||||
engineCfg.MaxTimeoutSeconds = 60
|
||||
engineCfg.ModuleCacheSize = 100
|
||||
|
||||
// Create secrets manager for serverless functions (AES-256-GCM encrypted)
|
||||
// Create secrets manager for serverless functions (AES-256-GCM encrypted).
|
||||
//
|
||||
// The encryption key comes from the gateway Config (loaded from
|
||||
// ~/.orama/secrets/secrets-encryption-key), NOT from engineCfg — engineCfg
|
||||
// never has the key set, so passing it always produced a per-process
|
||||
// ephemeral key and made get_secret return undecryptable values
|
||||
// (bugboard #837). allowEphemeral=false: a missing/invalid key fails
|
||||
// loudly here and disables get_secret rather than silently corrupting
|
||||
// secrets.
|
||||
var secretsMgr serverless.SecretsManager
|
||||
if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, engineCfg.SecretsEncryptionKey, logger.Logger); secretsErr != nil {
|
||||
if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, cfg.SecretsEncryptionKey, false, logger.Logger); secretsErr != nil {
|
||||
logger.ComponentWarn(logging.ComponentGeneral, "Failed to initialize secrets manager; get_secret will be unavailable",
|
||||
zap.Error(secretsErr))
|
||||
} else {
|
||||
|
||||
@ -39,6 +39,9 @@ type JoinResponse struct {
|
||||
APIKeyHMACSecret string `json:"api_key_hmac_secret,omitempty"`
|
||||
RQLitePassword string `json:"rqlite_password,omitempty"`
|
||||
OlricEncryptionKey string `json:"olric_encryption_key,omitempty"`
|
||||
// Serverless secrets encryption key (bugboard #837) — must be identical on
|
||||
// every node so namespace function secrets decrypt cluster-wide.
|
||||
SecretsEncryptionKey string `json:"secrets_encryption_key,omitempty"`
|
||||
|
||||
// Cluster join info (all using WG IPs)
|
||||
RQLiteJoinAddress string `json:"rqlite_join_address"`
|
||||
@ -200,6 +203,13 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) {
|
||||
olricEncryptionKey = strings.TrimSpace(string(data))
|
||||
}
|
||||
|
||||
// Read serverless secrets encryption key (optional — may not exist on
|
||||
// older clusters; bugboard #837)
|
||||
secretsEncryptionKey := ""
|
||||
if data, err := os.ReadFile(h.oramaDir + "/secrets/secrets-encryption-key"); err == nil {
|
||||
secretsEncryptionKey = strings.TrimSpace(string(data))
|
||||
}
|
||||
|
||||
// 7. Get this node's WG IP (needed before peer list to check self-inclusion)
|
||||
myWGIP, err := h.getMyWGIP()
|
||||
if err != nil {
|
||||
@ -271,6 +281,7 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) {
|
||||
APIKeyHMACSecret: apiKeyHMACSecret,
|
||||
RQLitePassword: rqlitePassword,
|
||||
OlricEncryptionKey: olricEncryptionKey,
|
||||
SecretsEncryptionKey: secretsEncryptionKey,
|
||||
RQLiteJoinAddress: fmt.Sprintf("%s:7001", myWGIP),
|
||||
IPFSPeer: ipfsPeer,
|
||||
IPFSClusterPeer: ipfsClusterPeer,
|
||||
|
||||
@ -17,7 +17,6 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/push"
|
||||
@ -136,13 +135,13 @@ func (h *Handlers) PutConfigHandler(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// Validate URL fields look reasonable. We don't do hostname resolution
|
||||
// here (slow, flaky); just reject obviously-wrong schemes.
|
||||
// Reject a base URL that targets an internal/reserved host — a tenant must
|
||||
// not be able to turn the gateway's push sender into an SSRF proxy (cloud
|
||||
// metadata, WireGuard mesh, loopback). This is the config-SET path, so the
|
||||
// DNS-resolving check is fine here; the hot send path never runs it.
|
||||
if body.NtfyBaseURL != nil && *body.NtfyBaseURL != "" {
|
||||
if !strings.HasPrefix(*body.NtfyBaseURL, "http://") &&
|
||||
!strings.HasPrefix(*body.NtfyBaseURL, "https://") {
|
||||
writeError(w, http.StatusBadRequest,
|
||||
"ntfy_base_url must start with http:// or https://")
|
||||
if err := push.CheckBaseURLResolvable(r.Context(), *body.NtfyBaseURL); err != nil {
|
||||
writeError(w, http.StatusBadRequest, "ntfy_base_url rejected: "+err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
63
core/pkg/gateway/handlers/push/resolve_caller_test.go
Normal file
63
core/pkg/gateway/handlers/push/resolve_caller_test.go
Normal file
@ -0,0 +1,63 @@
|
||||
package push
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
authsvc "github.com/DeBrosOfficial/network/pkg/gateway/auth"
|
||||
"github.com/DeBrosOfficial/network/pkg/gateway/ctxkeys"
|
||||
)
|
||||
|
||||
// Bugboard #548 — a push device must be keyed on the stable identity (rootId)
|
||||
// when the app provides one, not the wallet credential that authenticated the
|
||||
// session. resolveCallerUserID prefers the `root_id` custom claim and falls
|
||||
// back to the JWT subject so single-credential apps keep working.
|
||||
|
||||
func reqWithClaims(t *testing.T, claims *authsvc.JWTClaims) *http.Request {
|
||||
t.Helper()
|
||||
r := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||
ctx := r.Context()
|
||||
if claims != nil {
|
||||
ctx = context.WithValue(ctx, ctxkeys.JWT, claims)
|
||||
}
|
||||
return r.WithContext(ctx)
|
||||
}
|
||||
|
||||
func TestResolveCallerUserID_prefersRootIDClaim(t *testing.T) {
|
||||
r := reqWithClaims(t, &authsvc.JWTClaims{
|
||||
Sub: "0xWALLET",
|
||||
Custom: map[string]string{rootIDClaim: "root-uuid-123"},
|
||||
})
|
||||
if got := resolveCallerUserID(r); got != "root-uuid-123" {
|
||||
t.Errorf("want rootId from claim, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveCallerUserID_fallsBackToSubject(t *testing.T) {
|
||||
// No custom claim → wallet subject (back-compat for single-credential apps).
|
||||
r := reqWithClaims(t, &authsvc.JWTClaims{Sub: "0xWALLET"})
|
||||
if got := resolveCallerUserID(r); got != "0xWALLET" {
|
||||
t.Errorf("want wallet subject fallback, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveCallerUserID_emptyRootIDFallsBack(t *testing.T) {
|
||||
// An empty root_id must not collapse identity to "" — fall back to subject.
|
||||
r := reqWithClaims(t, &authsvc.JWTClaims{
|
||||
Sub: "0xWALLET",
|
||||
Custom: map[string]string{rootIDClaim: ""},
|
||||
})
|
||||
if got := resolveCallerUserID(r); got != "0xWALLET" {
|
||||
t.Errorf("want wallet fallback on empty root_id, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveCallerUserID_noJWTReturnsEmpty(t *testing.T) {
|
||||
// API-key-only request (no JWT in context) → empty.
|
||||
r := reqWithClaims(t, nil)
|
||||
if got := resolveCallerUserID(r); got != "" {
|
||||
t.Errorf("want empty for API-key-only request, got %q", got)
|
||||
}
|
||||
}
|
||||
@ -141,11 +141,27 @@ func resolveNamespace(r *http.Request) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveCallerUserID extracts the JWT subject (typically the wallet) of
|
||||
// the caller, or empty if the request was authenticated by API key only.
|
||||
// rootIDClaim is the custom JWT claim an app may set to carry the stable
|
||||
// identity (rootId) that a device should be keyed on, independent of which
|
||||
// wallet credential authenticated the session. See bugboard #548.
|
||||
const rootIDClaim = "root_id"
|
||||
|
||||
// resolveCallerUserID extracts the identity a push device should be keyed on.
|
||||
//
|
||||
// In a multi-credential app (anchat), the JWT subject is the *wallet* — a
|
||||
// credential, not the identity. A single user (rootId) with N linked wallets
|
||||
// would otherwise register N device rows and receive N duplicate pushes
|
||||
// (bugboard #548). When the app includes a stable `root_id` custom claim, we
|
||||
// key on that; otherwise we fall back to the subject (wallet) so single-
|
||||
// credential apps and older tokens keep working unchanged.
|
||||
//
|
||||
// Returns empty if the request was authenticated by API key only (no JWT).
|
||||
func resolveCallerUserID(r *http.Request) string {
|
||||
if v := r.Context().Value(ctxkeys.JWT); v != nil {
|
||||
if claims, ok := v.(*auth.JWTClaims); ok && claims != nil {
|
||||
if rootID, ok := claims.Custom[rootIDClaim]; ok && rootID != "" {
|
||||
return rootID
|
||||
}
|
||||
return claims.Sub
|
||||
}
|
||||
}
|
||||
|
||||
@ -145,6 +145,27 @@ func (h *ServerlessHandlers) InvokeFunction(w http.ResponseWriter, r *http.Reque
|
||||
w.Header().Set("X-Request-ID", resp.RequestID)
|
||||
w.Header().Set("X-Duration-Ms", strconv.FormatInt(resp.DurationMS, 10))
|
||||
|
||||
// Raw-HTTP-response mode (bugboard #835): when a function deployed with
|
||||
// raw_http_response actually set a response via set_http_response, replay
|
||||
// it verbatim (status + headers + body) and skip the sniff/wrap path. If
|
||||
// the function set nothing, RawHTTP is nil and we fall through to the
|
||||
// normal behavior unchanged.
|
||||
if resp.RawHTTP != nil {
|
||||
for k, v := range resp.RawHTTP.Headers {
|
||||
// A tenant function must not overwrite gateway-owned trace/auth
|
||||
// headers or framing-control (hop-by-hop) headers via its raw
|
||||
// response — that would let it forge request IDs, leak/spoof
|
||||
// internal-auth headers, or corrupt response framing.
|
||||
if isReservedResponseHeader(k) {
|
||||
continue
|
||||
}
|
||||
w.Header().Set(k, v)
|
||||
}
|
||||
w.WriteHeader(resp.RawHTTP.Status)
|
||||
w.Write(resp.RawHTTP.Body)
|
||||
return
|
||||
}
|
||||
|
||||
// Try to detect if output is JSON
|
||||
if len(resp.Output) > 0 && (resp.Output[0] == '{' || resp.Output[0] == '[') {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@ -256,3 +277,32 @@ func (h *ServerlessHandlers) ListVersions(w http.ResponseWriter, r *http.Request
|
||||
"count": len(versions),
|
||||
})
|
||||
}
|
||||
|
||||
// reservedResponseHeaders are response headers a raw-HTTP-response tenant
|
||||
// function (bugboard #835) must not be able to set or overwrite: gateway-owned
|
||||
// trace/auth headers and hop-by-hop / framing-control headers. Compared
|
||||
// case-insensitively; the X-Internal- prefix is matched separately.
|
||||
var reservedResponseHeaders = map[string]struct{}{
|
||||
"x-request-id": {},
|
||||
"x-duration-ms": {},
|
||||
"content-length": {},
|
||||
"transfer-encoding": {},
|
||||
"connection": {},
|
||||
"keep-alive": {},
|
||||
"proxy-authenticate": {},
|
||||
"proxy-authorization": {},
|
||||
"te": {},
|
||||
"trailer": {},
|
||||
"upgrade": {},
|
||||
}
|
||||
|
||||
// isReservedResponseHeader reports whether a tenant-supplied response header key
|
||||
// is reserved for the gateway and must be ignored in raw-HTTP-response mode.
|
||||
func isReservedResponseHeader(key string) bool {
|
||||
k := strings.ToLower(strings.TrimSpace(key))
|
||||
if _, ok := reservedResponseHeaders[k]; ok {
|
||||
return true
|
||||
}
|
||||
// Any internal-auth header the gateway uses for inter-service trust.
|
||||
return strings.HasPrefix(k, "x-internal-")
|
||||
}
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
package serverless
|
||||
|
||||
import "testing"
|
||||
|
||||
// Bugboard #835 hardening (flagged by code + security review): a raw-HTTP
|
||||
// tenant function must not be able to set/overwrite gateway-owned trace/auth
|
||||
// headers or hop-by-hop framing headers.
|
||||
|
||||
func TestIsReservedResponseHeader(t *testing.T) {
|
||||
reserved := []string{
|
||||
"X-Request-ID", "x-request-id", "X-Duration-Ms",
|
||||
"Content-Length", "Transfer-Encoding", "Connection", "Keep-Alive",
|
||||
"Proxy-Authenticate", "Proxy-Authorization", "TE", "Trailer", "Upgrade",
|
||||
"X-Internal-Auth", "x-internal-anything", " X-Request-Id ",
|
||||
}
|
||||
for _, h := range reserved {
|
||||
if !isReservedResponseHeader(h) {
|
||||
t.Errorf("isReservedResponseHeader(%q) = false; want true (must be protected)", h)
|
||||
}
|
||||
}
|
||||
|
||||
allowed := []string{
|
||||
"Content-Type", "Cache-Control", "X-Custom", "ETag",
|
||||
"Access-Control-Allow-Origin", "Location", "Retry-After",
|
||||
}
|
||||
for _, h := range allowed {
|
||||
if isReservedResponseHeader(h) {
|
||||
t.Errorf("isReservedResponseHeader(%q) = true; want false (tenant may set it)", h)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -58,6 +58,15 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
|
||||
rqlitePassword = strings.TrimSpace(string(secretBytes))
|
||||
}
|
||||
|
||||
// Read the serverless secrets encryption key (bugboard #837). Must be the
|
||||
// SAME value on every namespace-gateway node so a secret encrypted by one
|
||||
// process decrypts on another; an empty value makes get_secret fail loudly
|
||||
// (the manager refuses an ephemeral key in production).
|
||||
secretsEncryptionKey := ""
|
||||
if secretBytes, err := os.ReadFile(filepath.Join(oramaDir, "secrets", "secrets-encryption-key")); err == nil {
|
||||
secretsEncryptionKey = strings.TrimSpace(string(secretBytes))
|
||||
}
|
||||
|
||||
gwCfg := &gateway.Config{
|
||||
ListenAddr: n.config.HTTPGateway.ListenAddr,
|
||||
ClientNamespace: n.config.HTTPGateway.ClientNamespace,
|
||||
@ -75,6 +84,7 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
|
||||
RQLitePassword: rqlitePassword,
|
||||
ClusterSecret: clusterSecret,
|
||||
APIKeyHMACSecret: apiKeyHMACSecret,
|
||||
SecretsEncryptionKey: secretsEncryptionKey,
|
||||
WebRTCEnabled: n.config.HTTPGateway.WebRTC.Enabled,
|
||||
SFUPort: n.config.HTTPGateway.WebRTC.SFUPort,
|
||||
TURNDomain: n.config.HTTPGateway.WebRTC.TURNDomain,
|
||||
|
||||
@ -296,7 +296,17 @@ func (m *Manager) buildDispatcher(ctx context.Context, namespace string) (*PushD
|
||||
// (DELETE) — there's no "set this field to empty to clear"
|
||||
// half-state, by design.
|
||||
if nc.NtfyBaseURL != "" {
|
||||
eff.NtfyBaseURL = nc.NtfyBaseURL
|
||||
// Defense-in-depth: a base URL stored before the SSRF guard
|
||||
// existed (or via any path that skipped it) must not point at an
|
||||
// internal/reserved literal IP. Drop the override and fall back
|
||||
// to the gateway default if it does. Literal-only (no DNS, no
|
||||
// syntax re-validation) so this stays safe on the hot build path.
|
||||
if IsInternalBaseURL(nc.NtfyBaseURL) {
|
||||
m.logger.Warn("push: ignoring namespace ntfy_base_url override (internal address)",
|
||||
zap.String("namespace", namespace), zap.String("base_url", nc.NtfyBaseURL))
|
||||
} else {
|
||||
eff.NtfyBaseURL = nc.NtfyBaseURL
|
||||
}
|
||||
}
|
||||
if nc.NtfyAuthToken != "" {
|
||||
eff.NtfyAuthToken = nc.NtfyAuthToken
|
||||
|
||||
@ -16,10 +16,11 @@ package ntfy
|
||||
// migration window, with the new credentials store taking precedence.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/push"
|
||||
"github.com/DeBrosOfficial/network/pkg/push/credentials"
|
||||
)
|
||||
|
||||
@ -87,7 +88,17 @@ func (Validator) Validate(raw []byte) error {
|
||||
if err := json.Unmarshal(raw, &c); err != nil {
|
||||
return fmt.Errorf("ntfy credentials: invalid JSON: %w", err)
|
||||
}
|
||||
return validateCredentials(c)
|
||||
if err := validateCredentials(c); err != nil {
|
||||
return err
|
||||
}
|
||||
// Validate is the config-SET path (the hot build path uses ParseCredentials,
|
||||
// which skips DNS), so the resolving SSRF check is safe here: reject a
|
||||
// base_url whose host resolves to an internal/reserved address. Fail-open on
|
||||
// resolution error — see push.CheckBaseURLResolvable.
|
||||
if err := push.CheckBaseURLResolvable(context.Background(), c.BaseURL); err != nil {
|
||||
return fmt.Errorf("ntfy credentials: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Redact returns a JSON-safe view that never echoes the auth token or
|
||||
@ -127,10 +138,12 @@ func ParseCredentials(raw []byte) (Credentials, error) {
|
||||
// validateCredentials is the shared validator used by both Validate and
|
||||
// ParseCredentials.
|
||||
func validateCredentials(c Credentials) error {
|
||||
if c.BaseURL != "" {
|
||||
if !strings.HasPrefix(c.BaseURL, "http://") && !strings.HasPrefix(c.BaseURL, "https://") {
|
||||
return fmt.Errorf("ntfy credentials: base_url must start with http:// or https:// (got %q)", c.BaseURL)
|
||||
}
|
||||
// Literal-IP SSRF guard + scheme check. Runs on BOTH the set and the hot
|
||||
// build path (no DNS), so a stored internal-literal base_url is also
|
||||
// rejected when the dispatcher is (re)built. The DNS-resolving check lives
|
||||
// in Validate (set path only).
|
||||
if err := push.CheckBaseURLSyntax(c.BaseURL); err != nil {
|
||||
return fmt.Errorf("ntfy credentials: %w", err)
|
||||
}
|
||||
if c.TopicMode != "" {
|
||||
switch c.TopicMode {
|
||||
|
||||
@ -26,7 +26,10 @@ func TestValidator_RejectsBadBaseURL(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestValidator_AcceptsHttpAndHttps(t *testing.T) {
|
||||
for _, base := range []string{"http://push.local:8080", "https://push.example.com"} {
|
||||
// Literal public (documentation-range) IPs so the test is deterministic and
|
||||
// never hits real DNS — Validate now does a set-time SSRF resolve for
|
||||
// hostname base URLs.
|
||||
for _, base := range []string{"http://203.0.113.10:8080", "https://203.0.113.10"} {
|
||||
body, _ := json.Marshal(Credentials{BaseURL: base})
|
||||
if err := NewValidator().Validate(body); err != nil {
|
||||
t.Errorf("base_url=%q rejected: %v", base, err)
|
||||
@ -34,6 +37,21 @@ func TestValidator_AcceptsHttpAndHttps(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidator_RejectsInternalBaseURL(t *testing.T) {
|
||||
// SSRF guard: a tenant must not point the push base URL at an internal /
|
||||
// reserved address. Literal IPs are rejected without DNS.
|
||||
for _, base := range []string{
|
||||
"http://169.254.169.254", // cloud metadata
|
||||
"http://127.0.0.1:8090", // loopback (the operator's local ntfy)
|
||||
"http://10.0.0.5", // WireGuard mesh
|
||||
} {
|
||||
body, _ := json.Marshal(Credentials{BaseURL: base})
|
||||
if err := NewValidator().Validate(body); err == nil {
|
||||
t.Errorf("internal base_url %q must be rejected (SSRF)", base)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidator_RejectsBadTopicMode(t *testing.T) {
|
||||
if err := NewValidator().Validate([]byte(`{"topic_mode":"random"}`)); err == nil {
|
||||
t.Error("expected rejection of unknown topic_mode")
|
||||
|
||||
@ -74,15 +74,10 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
|
||||
return fmt.Errorf("ntfy: base URL not configured")
|
||||
}
|
||||
|
||||
// URL-escape each path segment of the device token. ntfy topics can be
|
||||
// hierarchical (e.g. "ns/myapp/user-1") and we want to preserve those
|
||||
// '/' separators while escaping any other special characters that
|
||||
// could let a malicious token escape the topic path.
|
||||
parts := strings.Split(msg.DeviceToken, "/")
|
||||
for i, p := range parts {
|
||||
parts[i] = url.PathEscape(p)
|
||||
endpointURL, err := p.resolveEndpoint(msg.DeviceToken)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
endpointURL := p.baseURL + "/" + strings.Join(parts, "/")
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(msg.Body))
|
||||
if err != nil {
|
||||
@ -130,3 +125,58 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
|
||||
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
|
||||
return nil
|
||||
}
|
||||
|
||||
// resolveEndpoint maps a device token to the ntfy publish URL.
|
||||
//
|
||||
// The token is one of two shapes:
|
||||
//
|
||||
// - A plain ntfy topic (possibly hierarchical, e.g. "ns/myapp/user-1") —
|
||||
// published to "<baseURL>/<topic>", with each path segment escaped so a
|
||||
// crafted token can't break out of the topic path.
|
||||
// - A full UnifiedPush endpoint URL handed to the client by the ntfy
|
||||
// distributor (e.g. "https://push.example.com/up<random>"). UnifiedPush
|
||||
// requires the application server to POST to that endpoint verbatim, so we
|
||||
// use it as-is — but ONLY after verifying its scheme+host match the
|
||||
// configured base URL. That check turns a device-supplied token into an
|
||||
// SSRF only against our own push host, never an arbitrary one.
|
||||
func (p *Provider) resolveEndpoint(token string) (string, error) {
|
||||
topic := token
|
||||
if isAbsoluteHTTPURL(token) {
|
||||
u, err := url.Parse(token)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("ntfy: invalid endpoint url: %w", err)
|
||||
}
|
||||
base, err := url.Parse(p.baseURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("ntfy: invalid base url %q: %w", p.baseURL, err)
|
||||
}
|
||||
if !strings.EqualFold(u.Scheme, base.Scheme) || !strings.EqualFold(u.Host, base.Host) {
|
||||
// Reject an endpoint pointing anywhere other than the configured
|
||||
// push host — a device token must never become an SSRF vector.
|
||||
return "", fmt.Errorf("ntfy: endpoint host %q does not match configured push host %q", u.Host, base.Host)
|
||||
}
|
||||
// Confine the URL form to the SAME publish surface as a bare topic:
|
||||
// take only the path as the topic and re-build through the per-segment
|
||||
// escaping below, dropping any query/fragment. So a UnifiedPush
|
||||
// endpoint token can publish a topic but can't gain arbitrary path or
|
||||
// query control on the push host beyond what a plain topic already has.
|
||||
topic = strings.TrimPrefix(u.Path, "/")
|
||||
if topic == "" {
|
||||
return "", fmt.Errorf("ntfy: endpoint url %q has no topic path", token)
|
||||
}
|
||||
}
|
||||
|
||||
// Escape each path segment, preserving the '/' hierarchy.
|
||||
parts := strings.Split(topic, "/")
|
||||
for i, seg := range parts {
|
||||
parts[i] = url.PathEscape(seg)
|
||||
}
|
||||
return p.baseURL + "/" + strings.Join(parts, "/"), nil
|
||||
}
|
||||
|
||||
// isAbsoluteHTTPURL reports whether s looks like an absolute http(s) URL (the
|
||||
// UnifiedPush endpoint form) rather than a bare ntfy topic.
|
||||
func isAbsoluteHTTPURL(s string) bool {
|
||||
lower := strings.ToLower(s)
|
||||
return strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://")
|
||||
}
|
||||
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@ -183,6 +184,108 @@ func TestSend_no_baseURL_returns_error(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// feat-32: an Android/GrapheneOS UnifiedPush device registers the full endpoint
|
||||
// URL its distributor hands it. UnifiedPush requires the app server to POST to
|
||||
// that endpoint verbatim, and we must do so ONLY when the host matches our
|
||||
// configured push server (never an arbitrary host → no SSRF).
|
||||
|
||||
func TestSend_unifiedPush_endpoint_published(t *testing.T) {
|
||||
var gotPath, gotBody string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotPath = r.URL.Path
|
||||
b, _ := io.ReadAll(r.Body)
|
||||
gotBody = string(b)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := New(Config{BaseURL: srv.URL}, nil)
|
||||
// The distributor hands the client a full endpoint on the SAME (push) host.
|
||||
endpoint := srv.URL + "/upAbc123"
|
||||
if err := p.Send(context.Background(), push.PushMessage{DeviceToken: endpoint, Body: "payload"}); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if gotPath != "/upAbc123" {
|
||||
t.Errorf("UnifiedPush endpoint must publish to its topic path; got %q", gotPath)
|
||||
}
|
||||
if gotBody != "payload" {
|
||||
t.Errorf("body not delivered; got %q", gotBody)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSend_unifiedPush_endpoint_confined_to_topic(t *testing.T) {
|
||||
// A URL token must be confined to the same publish surface as a bare topic:
|
||||
// the path becomes the topic, and any query string is dropped — so it can't
|
||||
// gain arbitrary path/query control on the push host.
|
||||
var gotPath, gotQuery string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotPath = r.URL.Path
|
||||
gotQuery = r.URL.RawQuery
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := New(Config{BaseURL: srv.URL}, nil)
|
||||
endpoint := srv.URL + "/uptopic?admin=1&x=y"
|
||||
if err := p.Send(context.Background(), push.PushMessage{DeviceToken: endpoint, Body: "x"}); err != nil {
|
||||
t.Fatalf("Send: %v", err)
|
||||
}
|
||||
if gotPath != "/uptopic" {
|
||||
t.Errorf("path must be the topic only; got %q", gotPath)
|
||||
}
|
||||
if gotQuery != "" {
|
||||
t.Errorf("query string must be dropped (no arbitrary query on push host); got %q", gotQuery)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSend_unifiedPush_endpoint_rejects_userinfo_bypass(t *testing.T) {
|
||||
// Classic SSRF guard bypass: smuggle the real host into userinfo. url.Parse
|
||||
// resolves the authority to the attacker host, so it must be rejected.
|
||||
hit := false
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
hit = true
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
// base host = srv host; token tries "<srvhost>@attacker.example.com".
|
||||
base, _ := url.Parse(srv.URL)
|
||||
p := New(Config{BaseURL: srv.URL}, nil)
|
||||
token := base.Scheme + "://" + base.Host + "@attacker.example.com/x"
|
||||
if err := p.Send(context.Background(), push.PushMessage{DeviceToken: token, Body: "x"}); err == nil {
|
||||
t.Fatal("expected rejection of a userinfo-smuggled host")
|
||||
}
|
||||
if hit {
|
||||
t.Error("no request must be sent for a userinfo-bypass token")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSend_unifiedPush_endpoint_rejects_foreign_host(t *testing.T) {
|
||||
hit := false
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
hit = true
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := New(Config{BaseURL: srv.URL}, nil)
|
||||
// A device token pointing at a DIFFERENT host must be rejected before any
|
||||
// request is made — a device token must never become an SSRF vector.
|
||||
err := p.Send(context.Background(), push.PushMessage{
|
||||
DeviceToken: "https://attacker.example.com/steal",
|
||||
Body: "x",
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected an error for an endpoint whose host doesn't match the push host")
|
||||
}
|
||||
if hit {
|
||||
t.Error("no request must be sent when the endpoint host doesn't match")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "does not match") {
|
||||
t.Errorf("error should explain the host mismatch; got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestName(t *testing.T) {
|
||||
p := New(Config{BaseURL: "http://x"}, nil)
|
||||
if p.Name() != "ntfy" {
|
||||
|
||||
193
core/pkg/push/url_guard.go
Normal file
193
core/pkg/push/url_guard.go
Normal file
@ -0,0 +1,193 @@
|
||||
package push
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// url_guard.go — SSRF guard for TENANT-supplied push base URLs.
|
||||
//
|
||||
// A tenant can override the ntfy base URL the gateway POSTs to (BYO-ntfy is a
|
||||
// legitimate use case). Without a guard, a tenant could point it at an internal
|
||||
// address — cloud metadata (169.254.169.254), the WireGuard mesh (10.0.0.x),
|
||||
// loopback — turning the gateway's push sender into an SSRF proxy. These checks
|
||||
// reject internal/reserved targets while still allowing real external hosts.
|
||||
//
|
||||
// IMPORTANT: apply these ONLY to tenant-supplied base URLs (the per-namespace
|
||||
// override). The operator's gateway default (e.g. 127.0.0.1:8090, the local
|
||||
// ntfy) is trusted and must NOT pass through here — it would be (correctly)
|
||||
// rejected as loopback.
|
||||
|
||||
// baseURLDNSTimeout bounds the hostname-resolution step in CheckBaseURLResolvable.
|
||||
const baseURLDNSTimeout = 5 * time.Second
|
||||
|
||||
// lookupIP resolves a host to its IPs. A package var so tests can substitute a
|
||||
// deterministic resolver instead of touching real DNS.
|
||||
var lookupIP = func(ctx context.Context, host string) ([]net.IP, error) {
|
||||
addrs, err := net.DefaultResolver.LookupIPAddr(ctx, host)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ips := make([]net.IP, len(addrs))
|
||||
for i, a := range addrs {
|
||||
ips[i] = a.IP
|
||||
}
|
||||
return ips, nil
|
||||
}
|
||||
|
||||
// CheckBaseURLSyntax validates a tenant base URL's scheme and rejects a host
|
||||
// that is a LITERAL internal/reserved IP. It does NOT resolve hostnames, so it
|
||||
// is safe to call on hot paths (e.g. per-send dispatcher construction). An
|
||||
// empty base URL is allowed — it means "use the gateway default".
|
||||
func CheckBaseURLSyntax(baseURL string) error {
|
||||
if baseURL == "" {
|
||||
return nil
|
||||
}
|
||||
u, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("base_url: invalid URL: %w", err)
|
||||
}
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return fmt.Errorf("base_url: must start with http:// or https:// (got scheme %q)", u.Scheme)
|
||||
}
|
||||
host := u.Hostname()
|
||||
if host == "" {
|
||||
return fmt.Errorf("base_url: missing host")
|
||||
}
|
||||
if ip := net.ParseIP(host); ip != nil {
|
||||
if isReservedIP(ip) {
|
||||
return fmt.Errorf("base_url: host %s is a reserved/internal address and is not allowed", host)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// net.ParseIP only accepts canonical dotted-decimal / standard IPv6, but the
|
||||
// OS resolver + net.Dial ALSO accept decimal ("2130706433"), hex
|
||||
// ("0x7f000001") and octal ("0177.0.0.1") IPv4 encodings — a literal-check
|
||||
// bypass to internal addresses. Reject these non-standard numeric hosts
|
||||
// outright (no legitimate push host is all-numeric or 0x-hex).
|
||||
if looksLikeNumericHost(host) {
|
||||
return fmt.Errorf("base_url: host %q is a non-standard numeric/IP encoding and is not allowed", host)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CheckBaseURLResolvable runs CheckBaseURLSyntax AND, when the host is a name
|
||||
// rather than a literal IP, resolves it (bounded) and rejects if ANY resolved
|
||||
// address is internal/reserved — blocking a tenant from pointing a domain at an
|
||||
// internal host. It performs DNS, so call it ONLY at config-set time (the PUT
|
||||
// handlers), never on the hot send path.
|
||||
//
|
||||
// Resolution failure FAILS OPEN (allowed): an unresolvable host reaches nothing
|
||||
// (delivery would fail anyway), and rejecting it would break a legitimate host
|
||||
// that's momentarily unresolvable at config time. The hard floor is
|
||||
// CheckBaseURLSyntax's literal-IP block, which applies on every code path.
|
||||
//
|
||||
// Residual: as a set-time check it does not defend against DNS rebinding (the
|
||||
// host re-pointing to an internal IP AFTER it was accepted). Closing that would
|
||||
// require a send-time IP check, which is complicated here by the operator's
|
||||
// loopback default ntfy.
|
||||
func CheckBaseURLResolvable(ctx context.Context, baseURL string) error {
|
||||
if err := CheckBaseURLSyntax(baseURL); err != nil {
|
||||
return err
|
||||
}
|
||||
if baseURL == "" {
|
||||
return nil
|
||||
}
|
||||
u, _ := url.Parse(baseURL) // already validated by CheckBaseURLSyntax
|
||||
host := u.Hostname()
|
||||
if net.ParseIP(host) != nil {
|
||||
return nil // literal IP already vetted by CheckBaseURLSyntax
|
||||
}
|
||||
|
||||
rctx, cancel := context.WithTimeout(ctx, baseURLDNSTimeout)
|
||||
defer cancel()
|
||||
ips, err := lookupIP(rctx, host)
|
||||
if err != nil || len(ips) == 0 {
|
||||
return nil // fail open on resolution failure (see doc)
|
||||
}
|
||||
for _, ip := range ips {
|
||||
if isReservedIP(ip) {
|
||||
return fmt.Errorf("base_url: host %q resolves to reserved/internal address %s and is not allowed", host, ip)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsInternalBaseURL reports whether baseURL parses to a host that is a LITERAL
|
||||
// internal/reserved IP. Malformed URLs and hostname URLs return false — this is
|
||||
// the no-false-positive guard for hot paths (e.g. dispatcher build), where the
|
||||
// goal is only to drop an internal-address override, not to re-validate syntax
|
||||
// or do DNS (the set-path handlers cover those).
|
||||
func IsInternalBaseURL(baseURL string) bool {
|
||||
u, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
host := u.Hostname()
|
||||
if ip := net.ParseIP(host); ip != nil {
|
||||
return isReservedIP(ip)
|
||||
}
|
||||
// Non-standard numeric encodings (decimal/hex/octal) that net.ParseIP misses
|
||||
// but net.Dial resolves to an IP — treat as internal so the build-path guard
|
||||
// matches what the dialer would actually reach.
|
||||
return looksLikeNumericHost(host)
|
||||
}
|
||||
|
||||
// isReservedIP reports whether ip is in a range a tenant must never be able to
|
||||
// reach via a push base URL: loopback, link-local (incl. 169.254.169.254 cloud
|
||||
// metadata), RFC1918 private, ULA, unspecified, multicast, and 100.64/10 CGNAT.
|
||||
func isReservedIP(ip net.IP) bool {
|
||||
if ip == nil {
|
||||
return true // unparseable → treat as unsafe
|
||||
}
|
||||
if ip4 := ip.To4(); ip4 != nil {
|
||||
// 100.64.0.0/10 — carrier-grade NAT (not covered by IsPrivate). The
|
||||
// second-octet band [64,127] is the /10.
|
||||
if ip4[0] == 100 && ip4[1] >= 64 && ip4[1] <= 127 {
|
||||
return true
|
||||
}
|
||||
} else if ip16 := ip.To16(); ip16 != nil {
|
||||
// NAT64 well-known prefix 64:ff9b::/96 (RFC 6052) embeds an IPv4 address
|
||||
// a NAT64 gateway would translate — so it can reach internal v4.
|
||||
if bytes.Equal(ip16[:12], []byte{0x00, 0x64, 0xff, 0x9b, 0, 0, 0, 0, 0, 0, 0, 0}) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return ip.IsLoopback() ||
|
||||
ip.IsLinkLocalUnicast() ||
|
||||
ip.IsLinkLocalMulticast() ||
|
||||
ip.IsInterfaceLocalMulticast() ||
|
||||
ip.IsMulticast() ||
|
||||
ip.IsPrivate() || // 10/8, 172.16/12, 192.168/16, fc00::/7
|
||||
ip.IsUnspecified()
|
||||
}
|
||||
|
||||
// looksLikeNumericHost reports whether host is a non-standard numeric IPv4
|
||||
// encoding — hex ("0x7f000001", "0x7f.0.0.1"), decimal ("2130706433"), or octal
|
||||
// ("0177.0.0.1") — that net.ParseIP rejects but the OS resolver and net.Dial
|
||||
// accept (resolving to a real, possibly internal, IPv4). Such hosts are never a
|
||||
// legitimate push server name, so callers reject them rather than let them slip
|
||||
// past the literal-IP guard. Hosts containing any letter (other than a leading
|
||||
// "0x") are treated as ordinary DNS names and return false.
|
||||
func looksLikeNumericHost(host string) bool {
|
||||
if host == "" {
|
||||
return false
|
||||
}
|
||||
if strings.HasPrefix(strings.ToLower(host), "0x") {
|
||||
return true // hex literal
|
||||
}
|
||||
// All-numeric (optionally dotted) host that net.ParseIP already failed to
|
||||
// accept: a decimal or octal IPv4 encoding (or a malformed all-numeric
|
||||
// dotted form). Either way, not a real hostname.
|
||||
for _, r := range host {
|
||||
if r != '.' && (r < '0' || r > '9') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
160
core/pkg/push/url_guard_test.go
Normal file
160
core/pkg/push/url_guard_test.go
Normal file
@ -0,0 +1,160 @@
|
||||
package push
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// SSRF guard for tenant push base URLs. These pin: literal internal/reserved IPs
|
||||
// are rejected, the cloud-metadata IP is rejected, legit external hosts pass,
|
||||
// and a hostname that RESOLVES to an internal address is rejected (the DNS
|
||||
// vector) while a public-resolving host passes.
|
||||
|
||||
func TestCheckBaseURLSyntax(t *testing.T) {
|
||||
cases := []struct {
|
||||
url string
|
||||
wantErr bool
|
||||
}{
|
||||
{"", false}, // empty = use default
|
||||
{"https://push.example.com", false}, // public host
|
||||
{"http://push.example.com:8090", false},
|
||||
{"https://1.1.1.1", false}, // public literal IP
|
||||
{"https://[2606:4700:4700::1111]", false}, // public v6
|
||||
{"ftp://push.example.com", true}, // bad scheme
|
||||
{"notaurl", true}, // no scheme/host
|
||||
{"http://", true}, // missing host
|
||||
{"http://169.254.169.254", true}, // cloud metadata (link-local)
|
||||
{"http://127.0.0.1", true}, // loopback
|
||||
{"http://127.0.0.1:8090", true}, // loopback + port
|
||||
{"http://10.0.0.5", true}, // RFC1918 (WireGuard mesh)
|
||||
{"http://192.168.1.1", true}, // RFC1918
|
||||
{"http://172.16.0.1", true}, // RFC1918
|
||||
{"http://100.64.0.1", true}, // CGNAT
|
||||
{"http://0.0.0.0", true}, // unspecified
|
||||
{"http://[::1]", true}, // v6 loopback
|
||||
{"http://[fd00::1]", true}, // v6 ULA
|
||||
{"http://[64:ff9b::a00:5]", true}, // NAT64-embedded 10.0.0.5
|
||||
{"http://0x7f000001", true}, // hex-encoded 127.0.0.1
|
||||
{"http://2130706433", true}, // decimal-encoded 127.0.0.1
|
||||
{"http://0177.0.0.1", true}, // octal-encoded 127.0.0.1
|
||||
}
|
||||
for _, tc := range cases {
|
||||
err := CheckBaseURLSyntax(tc.url)
|
||||
if tc.wantErr && err == nil {
|
||||
t.Errorf("CheckBaseURLSyntax(%q) = nil; want error", tc.url)
|
||||
}
|
||||
if !tc.wantErr && err != nil {
|
||||
t.Errorf("CheckBaseURLSyntax(%q) = %v; want nil", tc.url, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsReservedIP(t *testing.T) {
|
||||
reserved := []string{
|
||||
"127.0.0.1", "169.254.169.254", "10.0.0.1", "172.16.5.5", "192.168.0.1",
|
||||
"100.64.0.1", "100.100.100.200", "0.0.0.0", "224.0.0.1", "::1", "fe80::1",
|
||||
"fd00::1", "ff02::1",
|
||||
"64:ff9b::a00:1", // NAT64-embedded 10.0.0.1
|
||||
"64:ff9b::a9fe:a9fe", // NAT64-embedded 169.254.169.254 (metadata)
|
||||
}
|
||||
public := []string{"1.1.1.1", "8.8.8.8", "203.0.113.10", "2606:4700:4700::1111"}
|
||||
for _, s := range reserved {
|
||||
if ip := net.ParseIP(s); !isReservedIP(ip) {
|
||||
t.Errorf("isReservedIP(%s) = false; want true (reserved)", s)
|
||||
}
|
||||
}
|
||||
for _, s := range public {
|
||||
if ip := net.ParseIP(s); isReservedIP(ip) {
|
||||
t.Errorf("isReservedIP(%s) = true; want false (public)", s)
|
||||
}
|
||||
}
|
||||
if !isReservedIP(nil) {
|
||||
t.Error("isReservedIP(nil) must be true (unparseable → unsafe)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsInternalBaseURL(t *testing.T) {
|
||||
internal := []string{
|
||||
"http://10.0.0.5", "http://169.254.169.254",
|
||||
"https://127.0.0.1:8090", "http://[::1]", "http://192.168.1.1",
|
||||
"http://[64:ff9b::a00:5]", // NAT64
|
||||
"http://0x7f000001", // hex-encoded loopback
|
||||
"http://2130706433", // decimal-encoded loopback
|
||||
"http://0177.0.0.1", // octal-encoded loopback
|
||||
}
|
||||
notInternal := []string{
|
||||
"https://push.example.com", // hostname → false (the set path resolves it)
|
||||
"https://1.1.1.1", // public literal IP
|
||||
"ns-A-url", // malformed placeholder → must NOT be dropped
|
||||
"v1", "", "not a url",
|
||||
}
|
||||
for _, s := range internal {
|
||||
if !IsInternalBaseURL(s) {
|
||||
t.Errorf("IsInternalBaseURL(%q) = false; want true (internal literal IP)", s)
|
||||
}
|
||||
}
|
||||
for _, s := range notInternal {
|
||||
if IsInternalBaseURL(s) {
|
||||
t.Errorf("IsInternalBaseURL(%q) = true; want false", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckBaseURLResolvable(t *testing.T) {
|
||||
orig := lookupIP
|
||||
defer func() { lookupIP = orig }()
|
||||
|
||||
t.Run("hostname resolving to internal is rejected", func(t *testing.T) {
|
||||
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
|
||||
return []net.IP{net.ParseIP("10.0.0.7")}, nil // points at the mesh
|
||||
}
|
||||
if err := CheckBaseURLResolvable(context.Background(), "https://evil.example.com"); err == nil {
|
||||
t.Fatal("expected rejection of a host resolving to an internal address")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("hostname resolving to public is allowed", func(t *testing.T) {
|
||||
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
|
||||
return []net.IP{net.ParseIP("203.0.113.50")}, nil
|
||||
}
|
||||
if err := CheckBaseURLResolvable(context.Background(), "https://push.example.com"); err != nil {
|
||||
t.Fatalf("public-resolving host should pass: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("any internal IP among results is rejected", func(t *testing.T) {
|
||||
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
|
||||
return []net.IP{net.ParseIP("203.0.113.50"), net.ParseIP("127.0.0.1")}, nil
|
||||
}
|
||||
if err := CheckBaseURLResolvable(context.Background(), "https://mixed.example.com"); err == nil {
|
||||
t.Fatal("a host resolving to ANY internal address must be rejected")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("resolution failure is allowed (fail open)", func(t *testing.T) {
|
||||
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
|
||||
return nil, errors.New("nxdomain")
|
||||
}
|
||||
if err := CheckBaseURLResolvable(context.Background(), "https://unresolvable.example.com"); err != nil {
|
||||
t.Fatalf("an unresolvable host should fail open (be allowed); got %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("literal internal IP rejected without DNS", func(t *testing.T) {
|
||||
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
|
||||
t.Fatal("DNS must not be consulted for a literal IP host")
|
||||
return nil, nil
|
||||
}
|
||||
if err := CheckBaseURLResolvable(context.Background(), "http://169.254.169.254"); err == nil {
|
||||
t.Fatal("literal metadata IP must be rejected")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty is allowed", func(t *testing.T) {
|
||||
if err := CheckBaseURLResolvable(context.Background(), ""); err != nil {
|
||||
t.Fatalf("empty base_url should pass: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
@ -2,6 +2,7 @@ package serverless
|
||||
|
||||
import (
|
||||
"context"
|
||||
cryptorand "crypto/rand"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
@ -318,6 +319,15 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
|
||||
// gates invocation frequency, not per-invocation host-call volume).
|
||||
execCtx = WithPublishCounter(execCtx)
|
||||
|
||||
// Raw-HTTP-response mode (bugboard #835). Only RawHTTPResponse functions
|
||||
// get a collector attached — set_http_response is a validated no-op for
|
||||
// every other function (no collector → host call returns an error). The
|
||||
// collector rides execCtx so concurrent invocations never cross-write,
|
||||
// matching the publish-counter / log-buffer per-call model.
|
||||
if fn.RawHTTPResponse {
|
||||
execCtx = WithRawHTTPCollector(execCtx)
|
||||
}
|
||||
|
||||
// Get compiled module (from cache or compile)
|
||||
module, err := e.getOrCompileModule(execCtx, fn.WASMCID)
|
||||
if err != nil {
|
||||
@ -346,6 +356,14 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
|
||||
return nil, &ExecutionError{FunctionName: fn.Name, RequestID: invCtx.RequestID, Cause: err}
|
||||
}
|
||||
|
||||
// Surface any verbatim HTTP response the function set (bugboard #835)
|
||||
// onto invCtx so the Invoker → HTTP handler can replay it. Only
|
||||
// RawHTTPResponse functions have a collector attached; TakeRawHTTPResponse
|
||||
// returns (_, false) otherwise.
|
||||
if res, ok := TakeRawHTTPResponse(execCtx); ok {
|
||||
invCtx.RawHTTP = &res
|
||||
}
|
||||
|
||||
e.logInvocation(ctx, fn, invCtx, logBuf, startTime, len(output), InvocationStatusSuccess, nil)
|
||||
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, "success", nil)
|
||||
return output, nil
|
||||
@ -547,7 +565,13 @@ func (e *Engine) InstantiatePersistent(ctx context.Context, fn *Function, invCtx
|
||||
// into real clocks via the documented wazero hook — same effect as
|
||||
// the runtime would get on a normal Go process.
|
||||
WithSysWalltime().
|
||||
WithSysNanotime()
|
||||
WithSysNanotime().
|
||||
// Bugboard #120 — same class as #27. Without WithRandSource, wazero's
|
||||
// default RNG is deterministic (zero seed), so TinyGo crypto/rand.Read
|
||||
// returns identical bytes on every fresh instance — constant codes /
|
||||
// nonces / tokens. Wire in the host CSPRNG. Same fix at
|
||||
// execution/executor.go for the stateless path.
|
||||
WithRandSource(cryptorand.Reader)
|
||||
|
||||
instance, err := e.runtime.InstantiateModule(ctx, compiled, moduleConfig)
|
||||
if err != nil {
|
||||
@ -742,6 +766,7 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
|
||||
NewFunctionBuilder().WithFunc(e.hCacheIncrBy).Export("cache_incr_by").
|
||||
NewFunctionBuilder().WithFunc(e.hHTTPFetch).Export("http_fetch").
|
||||
NewFunctionBuilder().WithFunc(e.hAnyoneFetch).Export("anyone_fetch").
|
||||
NewFunctionBuilder().WithFunc(e.hSetHTTPResponse).Export("set_http_response").
|
||||
NewFunctionBuilder().WithFunc(e.hPubSubPublish).Export("pubsub_publish").
|
||||
NewFunctionBuilder().WithFunc(e.hPubSubPublishBatch).Export("pubsub_publish_batch").
|
||||
NewFunctionBuilder().WithFunc(e.hPushSend).Export("push_send").
|
||||
@ -751,6 +776,8 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
|
||||
NewFunctionBuilder().WithFunc(e.hWSPubSubUnbridge).Export("ws_pubsub_unbridge").
|
||||
NewFunctionBuilder().WithFunc(e.hWSSend).Export("ws_send").
|
||||
NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast").
|
||||
NewFunctionBuilder().WithFunc(e.hEphemeralStateSet).Export("ephemeral_state_set").
|
||||
NewFunctionBuilder().WithFunc(e.hEphemeralStateClear).Export("ephemeral_state_clear").
|
||||
NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke").
|
||||
NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async").
|
||||
NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info").
|
||||
@ -948,6 +975,40 @@ func (e *Engine) hHTTPFetch(ctx context.Context, mod api.Module, methodPtr, meth
|
||||
return e.executor.WriteToGuest(ctx, mod, resp)
|
||||
}
|
||||
|
||||
// hSetHTTPResponse is the WASM-callable wrapper for SetHTTPResponse —
|
||||
// bugboard #835 raw-HTTP-response mode.
|
||||
//
|
||||
// ABI: set_http_response(status i32, headersJSONPtr, headersJSONLen,
|
||||
// bodyPtr, bodyLen uint32) -> uint32. headersJSON (when non-empty) is a JSON
|
||||
// object of string→string. Returns 1 on success, 0 on failure (function not
|
||||
// deployed with raw_http_response, bad status, oversized headers/body, or a
|
||||
// guest-memory read error).
|
||||
func (e *Engine) hSetHTTPResponse(ctx context.Context, mod api.Module,
|
||||
status, headersPtr, headersLen, bodyPtr, bodyLen uint32) uint32 {
|
||||
var headers map[string]string
|
||||
if headersLen > 0 {
|
||||
if err := e.executor.UnmarshalJSONFromGuest(mod, headersPtr, headersLen, &headers); err != nil {
|
||||
e.logger.Warn("set_http_response: failed to unmarshal headers", zap.Error(err))
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
var body []byte
|
||||
if bodyLen > 0 {
|
||||
b, ok := e.executor.ReadFromGuest(mod, bodyPtr, bodyLen)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
body = b
|
||||
}
|
||||
|
||||
if err := e.hostServices.SetHTTPResponse(ctx, int(status), headers, body); err != nil {
|
||||
e.logger.Warn("host function set_http_response failed", zap.Error(err))
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
// hAnyoneFetch is the WASM-callable wrapper for AnyoneFetch — feat-11.
|
||||
// Identical ABI to hHTTPFetch (method, url, headers JSON, body), routes
|
||||
// through the Anyone SOCKS5 proxy. Returns packed (ptr<<32 | len) to the
|
||||
@ -1291,6 +1352,67 @@ func (e *Engine) hWSBroadcast(ctx context.Context, mod api.Module,
|
||||
return 1
|
||||
}
|
||||
|
||||
// hEphemeralStateSet is the WASM-callable wrapper for EphemeralStateSet —
|
||||
// bugboard #710 WS-subscribe-tracked ephemeral state.
|
||||
//
|
||||
// ABI: ephemeral_state_set(topicPtr, topicLen, keyPtr, keyLen, payloadPtr,
|
||||
// payloadLen uint32, ttlMs int64) -> uint32. Returns 1 on success, 0 on
|
||||
// failure (no WS client in context, empty topic/key, oversized payload,
|
||||
// per-client key cap, or a guest-memory read error).
|
||||
func (e *Engine) hEphemeralStateSet(ctx context.Context, mod api.Module,
|
||||
topicPtr, topicLen, keyPtr, keyLen, payloadPtr, payloadLen uint32, ttlMs int64) uint32 {
|
||||
topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
key, ok := e.executor.ReadFromGuest(mod, keyPtr, keyLen)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
var payload []byte
|
||||
if payloadLen > 0 {
|
||||
p, ok := e.executor.ReadFromGuest(mod, payloadPtr, payloadLen)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
payload = p
|
||||
}
|
||||
if err := e.hostServices.EphemeralStateSet(ctx, string(topic), string(key), payload, ttlMs); err != nil {
|
||||
e.logger.Warn("host function ephemeral_state_set failed",
|
||||
zap.String("topic", string(topic)),
|
||||
zap.String("key", string(key)),
|
||||
zap.Error(err))
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
// hEphemeralStateClear is the WASM-callable wrapper for EphemeralStateClear.
|
||||
//
|
||||
// ABI: ephemeral_state_clear(topicPtr, topicLen, keyPtr, keyLen uint32) ->
|
||||
// uint32. Returns 1 on success (including idempotent clears of a missing key),
|
||||
// 0 on failure (no WS client in context, empty topic/key, or a guest-memory
|
||||
// read error).
|
||||
func (e *Engine) hEphemeralStateClear(ctx context.Context, mod api.Module,
|
||||
topicPtr, topicLen, keyPtr, keyLen uint32) uint32 {
|
||||
topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
key, ok := e.executor.ReadFromGuest(mod, keyPtr, keyLen)
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
if err := e.hostServices.EphemeralStateClear(ctx, string(topic), string(key)); err != nil {
|
||||
e.logger.Warn("host function ephemeral_state_clear failed",
|
||||
zap.String("topic", string(topic)),
|
||||
zap.String("key", string(key)),
|
||||
zap.Error(err))
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
// hPushSend is the WASM-callable wrapper for PushSend.
|
||||
// Inputs:
|
||||
// userIDPtr/userIDLen — UTF-8 user ID to push to (within the function's
|
||||
|
||||
52
core/pkg/serverless/ephemeral_disconnect_test.go
Normal file
52
core/pkg/serverless/ephemeral_disconnect_test.go
Normal file
@ -0,0 +1,52 @@
|
||||
package serverless
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// fakeWSConn is a no-op WebSocketConn for exercising WSManager lifecycle.
|
||||
type fakeWSConn struct{}
|
||||
|
||||
func (fakeWSConn) WriteMessage(int, []byte) error { return nil }
|
||||
func (fakeWSConn) ReadMessage() (int, []byte, error) { return 0, nil, nil }
|
||||
func (fakeWSConn) Close() error { return nil }
|
||||
|
||||
// TestWSManager_DisconnectHookClearsEphemeralState verifies the wiring that
|
||||
// makes Feature #710's auto-clear work: a disconnect hook registered on the
|
||||
// WSManager fires on Unregister, clearing the disconnecting client's ephemeral
|
||||
// state. Both the stateless and persistent WS handlers call Unregister, so
|
||||
// this single hook covers both paths.
|
||||
func TestWSManager_DisconnectHookClearsEphemeralState(t *testing.T) {
|
||||
logger := zap.NewNop()
|
||||
wsm := NewWSManager(logger)
|
||||
pub := &capturePublisher{}
|
||||
store := NewEphemeralStore(pub.publish)
|
||||
|
||||
// Wire the hook exactly as NewHostFunctions does.
|
||||
wsm.AddDisconnectHook(func(clientID string) {
|
||||
store.ClearClient(context.Background(), clientID)
|
||||
})
|
||||
|
||||
clientID := "client-A"
|
||||
wsm.Register(clientID, fakeWSConn{})
|
||||
|
||||
if err := store.Set(context.Background(), "ns1", clientID, "t", "k", []byte("p"), 0); err != nil {
|
||||
t.Fatalf("Set: %v", err)
|
||||
}
|
||||
if store.keyCountForTest() != 1 {
|
||||
t.Fatalf("expected 1 key before disconnect, got %d", store.keyCountForTest())
|
||||
}
|
||||
|
||||
// Disconnect → hook fires → state cleared + synthetic clear published.
|
||||
wsm.Unregister(clientID)
|
||||
|
||||
if store.keyCountForTest() != 0 {
|
||||
t.Errorf("disconnect hook did not clear ephemeral state, count=%d", store.keyCountForTest())
|
||||
}
|
||||
if pub.countKind(EphemeralEventClear) != 1 {
|
||||
t.Errorf("expected 1 synthetic clear on disconnect, got %d", pub.countKind(EphemeralEventClear))
|
||||
}
|
||||
}
|
||||
352
core/pkg/serverless/ephemeral_state.go
Normal file
352
core/pkg/serverless/ephemeral_state.go
Normal file
@ -0,0 +1,352 @@
|
||||
package serverless
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// WS-subscribe-tracked ephemeral state primitive (bugboard #710).
|
||||
//
|
||||
// A serverless function can publish short-lived per-subscriber state (typing
|
||||
// indicators, "online" flags, cursor positions, …) keyed by (topic, key) and
|
||||
// have the gateway AUTO-CLEAR that state the moment the owning WebSocket
|
||||
// client disconnects — publishing a synthetic clear event so every subscriber
|
||||
// sees the state vanish with zero cron lag. State also expires on a TTL as a
|
||||
// backstop.
|
||||
//
|
||||
// Ownership model: each set is tagged with the CURRENT invocation's WS client
|
||||
// ID (the same source GetWSClientID reads). On disconnect the store iterates
|
||||
// that client's owned (topic,key) entries, publishes a clear event for each,
|
||||
// and drops them. A client's disconnect never touches another client's state.
|
||||
|
||||
const (
|
||||
// ephemeralMaxKeysPerClient caps how many distinct (topic,key) entries a
|
||||
// single WS client may own at once. Bounds the per-client memory + the
|
||||
// fan-out of synthetic clears on disconnect.
|
||||
ephemeralMaxKeysPerClient = 256
|
||||
|
||||
// ephemeralMaxPayloadBytes caps a single ephemeral payload. Generous for
|
||||
// presence/typing/cursor metadata while bounding gateway memory.
|
||||
ephemeralMaxPayloadBytes = 16 << 10 // 16 KiB
|
||||
|
||||
// ephemeralMaxTTL caps the requested TTL. Ephemeral state is meant to be
|
||||
// short-lived; the disconnect hook is the primary cleanup path and the TTL
|
||||
// is only a backstop, so a long TTL is never useful.
|
||||
ephemeralMaxTTL = 30 * time.Minute
|
||||
|
||||
// ephemeralDefaultTTL is applied when a caller passes ttlMs <= 0.
|
||||
ephemeralDefaultTTL = 60 * time.Second
|
||||
|
||||
// ephemeralSweepInterval is how often the backstop sweeper scans for
|
||||
// expired entries. The disconnect hook handles the common case; the
|
||||
// sweeper only catches entries whose owner is still connected but whose
|
||||
// TTL elapsed.
|
||||
ephemeralSweepInterval = 10 * time.Second
|
||||
)
|
||||
|
||||
// EphemeralEventKind discriminates the synthetic events published on a topic.
|
||||
type EphemeralEventKind string
|
||||
|
||||
const (
|
||||
EphemeralEventSet EphemeralEventKind = "set"
|
||||
EphemeralEventClear EphemeralEventKind = "clear"
|
||||
)
|
||||
|
||||
// EphemeralEvent is the wire shape published on the topic when ephemeral state
|
||||
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers key off
|
||||
// Kind + Key to update their local view. Payload is only populated for "set".
|
||||
type EphemeralEvent struct {
|
||||
Type string `json:"__ephemeral"` // always "state"
|
||||
Kind EphemeralEventKind `json:"kind"` // set | clear
|
||||
Key string `json:"key"` // app-chosen key
|
||||
ClientID string `json:"client_id"` // owning WS client
|
||||
// Payload is the opaque app-chosen blob (may be JSON, protobuf, or
|
||||
// arbitrary bytes), present only for "set". encoding/json base64-encodes
|
||||
// a []byte on the wire, so subscribers base64-decode "payload" to recover
|
||||
// the original bytes — mirroring how pubsub_publish_batch carries data.
|
||||
Payload []byte `json:"payload,omitempty"`
|
||||
Reason string `json:"reason,omitempty"` // clear only: explicit|disconnect|expired
|
||||
}
|
||||
|
||||
// ephemeralPublisher publishes data on a (namespace, topic). Abstracted so the
|
||||
// store can publish synthetic clears without depending on the concrete pubsub
|
||||
// adapter type — and so tests can capture published events. Namespace handling
|
||||
// matches the host pubsub path: the adapter namespaces internally, so this
|
||||
// publisher receives the already-namespaced caller's topic verbatim.
|
||||
type ephemeralPublisher func(ctx context.Context, namespace, topic string, data []byte) error
|
||||
|
||||
// ephemeralEntry is one stored value plus its expiry and the metadata needed
|
||||
// to publish a clear event for it.
|
||||
type ephemeralEntry struct {
|
||||
namespace string
|
||||
topic string
|
||||
key string
|
||||
clientID string
|
||||
payload []byte
|
||||
expiresAt time.Time
|
||||
}
|
||||
|
||||
// ephemeralStateKey identifies a stored value across namespaces/topics.
|
||||
type ephemeralStateKey struct {
|
||||
namespace string
|
||||
topic string
|
||||
key string
|
||||
}
|
||||
|
||||
// EphemeralStore holds WS-subscribe-tracked ephemeral state with auto-clear on
|
||||
// disconnect (bugboard #710). Safe for concurrent use.
|
||||
type EphemeralStore struct {
|
||||
publish ephemeralPublisher
|
||||
|
||||
mu sync.Mutex
|
||||
// values keyed by (ns, topic, key).
|
||||
values map[ephemeralStateKey]*ephemeralEntry
|
||||
// owned maps a clientID to the set of state keys it owns, for O(1)
|
||||
// disconnect cleanup.
|
||||
owned map[string]map[ephemeralStateKey]struct{}
|
||||
|
||||
// sweeper lifecycle.
|
||||
stopOnce sync.Once
|
||||
stopCh chan struct{}
|
||||
now func() time.Time // injectable clock for tests
|
||||
}
|
||||
|
||||
// NewEphemeralStore constructs a store with the given publisher. The publisher
|
||||
// may be nil (set/clear then skip publishing) — useful in tests, but in
|
||||
// production the host wires the pubsub adapter so subscribers see events.
|
||||
func NewEphemeralStore(publish ephemeralPublisher) *EphemeralStore {
|
||||
return &EphemeralStore{
|
||||
publish: publish,
|
||||
values: make(map[ephemeralStateKey]*ephemeralEntry),
|
||||
owned: make(map[string]map[ephemeralStateKey]struct{}),
|
||||
stopCh: make(chan struct{}),
|
||||
now: time.Now,
|
||||
}
|
||||
}
|
||||
|
||||
// Set records an ephemeral value owned by clientID and publishes a "set" event
|
||||
// on the topic so subscribers observe it. Returns an error on validation
|
||||
// failure (empty client/topic/key, oversized payload, per-client cap reached).
|
||||
func (s *EphemeralStore) Set(ctx context.Context, namespace, clientID, topic, key string, payload []byte, ttlMs int64) error {
|
||||
if clientID == "" {
|
||||
return fmt.Errorf("ephemeral_state_set: requires a WebSocket client (no ws_client_id in invocation context)")
|
||||
}
|
||||
if topic == "" || key == "" {
|
||||
return fmt.Errorf("ephemeral_state_set: topic and key are required")
|
||||
}
|
||||
if len(payload) > ephemeralMaxPayloadBytes {
|
||||
return fmt.Errorf("ephemeral_state_set: payload too large (%d > %d bytes)", len(payload), ephemeralMaxPayloadBytes)
|
||||
}
|
||||
|
||||
ttl := time.Duration(ttlMs) * time.Millisecond
|
||||
if ttl <= 0 {
|
||||
ttl = ephemeralDefaultTTL
|
||||
}
|
||||
if ttl > ephemeralMaxTTL {
|
||||
ttl = ephemeralMaxTTL
|
||||
}
|
||||
|
||||
sk := ephemeralStateKey{namespace: namespace, topic: topic, key: key}
|
||||
payloadCopy := make([]byte, len(payload))
|
||||
copy(payloadCopy, payload)
|
||||
|
||||
s.mu.Lock()
|
||||
ownedSet := s.owned[clientID]
|
||||
// Enforce the per-client cap only for NEW keys this client doesn't already
|
||||
// own — overwriting an existing key must always be allowed.
|
||||
if _, alreadyOwned := s.values[sk]; !alreadyOwned || s.values[sk].clientID != clientID {
|
||||
if len(ownedSet) >= ephemeralMaxKeysPerClient {
|
||||
s.mu.Unlock()
|
||||
return fmt.Errorf("ephemeral_state_set: client %s exceeded max %d ephemeral keys", clientID, ephemeralMaxKeysPerClient)
|
||||
}
|
||||
}
|
||||
|
||||
// If a different client owned this exact (ns,topic,key), transfer ownership
|
||||
// — drop it from the previous owner's set so its disconnect won't clear
|
||||
// state it no longer owns.
|
||||
if prev, ok := s.values[sk]; ok && prev.clientID != clientID {
|
||||
if prevSet := s.owned[prev.clientID]; prevSet != nil {
|
||||
delete(prevSet, sk)
|
||||
if len(prevSet) == 0 {
|
||||
delete(s.owned, prev.clientID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.values[sk] = &ephemeralEntry{
|
||||
namespace: namespace,
|
||||
topic: topic,
|
||||
key: key,
|
||||
clientID: clientID,
|
||||
payload: payloadCopy,
|
||||
expiresAt: s.now().Add(ttl),
|
||||
}
|
||||
if ownedSet == nil {
|
||||
ownedSet = make(map[ephemeralStateKey]struct{})
|
||||
s.owned[clientID] = ownedSet
|
||||
}
|
||||
ownedSet[sk] = struct{}{}
|
||||
s.mu.Unlock()
|
||||
|
||||
evt := EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventSet,
|
||||
Key: key,
|
||||
ClientID: clientID,
|
||||
Payload: payloadCopy,
|
||||
}
|
||||
return s.publishEvent(ctx, namespace, topic, evt)
|
||||
}
|
||||
|
||||
// Clear removes an ephemeral value the client owns and publishes a "clear"
|
||||
// event with reason "explicit". Clearing a key the client does not own (or a
|
||||
// missing key) is a no-op that still returns nil — clears are idempotent.
|
||||
func (s *EphemeralStore) Clear(ctx context.Context, namespace, clientID, topic, key string) error {
|
||||
if clientID == "" {
|
||||
return fmt.Errorf("ephemeral_state_clear: requires a WebSocket client (no ws_client_id in invocation context)")
|
||||
}
|
||||
if topic == "" || key == "" {
|
||||
return fmt.Errorf("ephemeral_state_clear: topic and key are required")
|
||||
}
|
||||
|
||||
sk := ephemeralStateKey{namespace: namespace, topic: topic, key: key}
|
||||
|
||||
s.mu.Lock()
|
||||
entry, ok := s.values[sk]
|
||||
if !ok || entry.clientID != clientID {
|
||||
// Not present, or owned by someone else — idempotent no-op.
|
||||
s.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
s.removeLocked(sk, entry)
|
||||
s.mu.Unlock()
|
||||
|
||||
return s.publishEvent(ctx, namespace, topic, EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventClear,
|
||||
Key: key,
|
||||
ClientID: clientID,
|
||||
Reason: "explicit",
|
||||
})
|
||||
}
|
||||
|
||||
// ClearClient removes every entry owned by clientID and publishes a clear
|
||||
// event for each (reason "disconnect"). Called from the WS disconnect hook —
|
||||
// the primary, zero-lag cleanup path. Safe to call for an unknown client.
|
||||
func (s *EphemeralStore) ClearClient(ctx context.Context, clientID string) {
|
||||
s.clearClientWithReason(ctx, clientID, "disconnect")
|
||||
}
|
||||
|
||||
func (s *EphemeralStore) clearClientWithReason(ctx context.Context, clientID, reason string) {
|
||||
s.mu.Lock()
|
||||
ownedSet := s.owned[clientID]
|
||||
if len(ownedSet) == 0 {
|
||||
delete(s.owned, clientID)
|
||||
s.mu.Unlock()
|
||||
return
|
||||
}
|
||||
// Snapshot entries to publish after releasing the lock.
|
||||
toClear := make([]*ephemeralEntry, 0, len(ownedSet))
|
||||
for sk := range ownedSet {
|
||||
if entry, ok := s.values[sk]; ok {
|
||||
toClear = append(toClear, entry)
|
||||
delete(s.values, sk)
|
||||
}
|
||||
}
|
||||
delete(s.owned, clientID)
|
||||
s.mu.Unlock()
|
||||
|
||||
for _, entry := range toClear {
|
||||
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventClear,
|
||||
Key: entry.key,
|
||||
ClientID: clientID,
|
||||
Reason: reason,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// removeLocked drops one entry from both maps. Caller holds s.mu.
|
||||
func (s *EphemeralStore) removeLocked(sk ephemeralStateKey, entry *ephemeralEntry) {
|
||||
delete(s.values, sk)
|
||||
if set := s.owned[entry.clientID]; set != nil {
|
||||
delete(set, sk)
|
||||
if len(set) == 0 {
|
||||
delete(s.owned, entry.clientID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// publishEvent marshals and publishes a synthetic event. No-op (nil) when no
|
||||
// publisher is wired.
|
||||
func (s *EphemeralStore) publishEvent(ctx context.Context, namespace, topic string, evt EphemeralEvent) error {
|
||||
if s.publish == nil {
|
||||
return nil
|
||||
}
|
||||
data, err := json.Marshal(evt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ephemeral state: marshal event: %w", err)
|
||||
}
|
||||
if err := s.publish(ctx, namespace, topic, data); err != nil {
|
||||
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Kind, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartSweeper launches the TTL backstop sweeper. Idempotent guards aren't
|
||||
// provided — call exactly once. Stop with StopSweeper.
|
||||
func (s *EphemeralStore) StartSweeper() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(ephemeralSweepInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-s.stopCh:
|
||||
return
|
||||
case <-ticker.C:
|
||||
s.sweepExpired(context.Background())
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// StopSweeper stops the backstop sweeper. Safe to call multiple times.
|
||||
func (s *EphemeralStore) StopSweeper() {
|
||||
s.stopOnce.Do(func() { close(s.stopCh) })
|
||||
}
|
||||
|
||||
// sweepExpired removes and publishes clears for every entry whose TTL elapsed.
|
||||
func (s *EphemeralStore) sweepExpired(ctx context.Context) {
|
||||
now := s.now()
|
||||
|
||||
s.mu.Lock()
|
||||
var expired []*ephemeralEntry
|
||||
for sk, entry := range s.values {
|
||||
if now.After(entry.expiresAt) {
|
||||
expired = append(expired, entry)
|
||||
s.removeLocked(sk, entry)
|
||||
}
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
for _, entry := range expired {
|
||||
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
|
||||
Type: "state",
|
||||
Kind: EphemeralEventClear,
|
||||
Key: entry.key,
|
||||
ClientID: entry.clientID,
|
||||
Reason: "expired",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// keyCountForTest returns the number of stored values (test-only accessor).
|
||||
func (s *EphemeralStore) keyCountForTest() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return len(s.values)
|
||||
}
|
||||
295
core/pkg/serverless/ephemeral_state_test.go
Normal file
295
core/pkg/serverless/ephemeral_state_test.go
Normal file
@ -0,0 +1,295 @@
|
||||
package serverless
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// capturePublisher records every published event for assertions.
|
||||
type capturePublisher struct {
|
||||
mu sync.Mutex
|
||||
events []capturedEvent
|
||||
}
|
||||
|
||||
type capturedEvent struct {
|
||||
namespace string
|
||||
topic string
|
||||
event EphemeralEvent
|
||||
}
|
||||
|
||||
func (c *capturePublisher) publish(_ context.Context, namespace, topic string, data []byte) error {
|
||||
var evt EphemeralEvent
|
||||
if err := json.Unmarshal(data, &evt); err != nil {
|
||||
return err
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.events = append(c.events, capturedEvent{namespace: namespace, topic: topic, event: evt})
|
||||
c.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *capturePublisher) snapshot() []capturedEvent {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
out := make([]capturedEvent, len(c.events))
|
||||
copy(out, c.events)
|
||||
return out
|
||||
}
|
||||
|
||||
func (c *capturePublisher) countKind(kind EphemeralEventKind) int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
n := 0
|
||||
for _, e := range c.events {
|
||||
if e.event.Kind == kind {
|
||||
n++
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func newTestStore(pub ephemeralPublisher) *EphemeralStore {
|
||||
s := NewEphemeralStore(pub)
|
||||
return s
|
||||
}
|
||||
|
||||
func TestEphemeralStore_SetThenClear(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := s.Set(ctx, "ns1", "client-A", "typing:room1", "k1", []byte(`{"typing":true}`), 0); err != nil {
|
||||
t.Fatalf("Set: %v", err)
|
||||
}
|
||||
if s.keyCountForTest() != 1 {
|
||||
t.Fatalf("expected 1 stored key, got %d", s.keyCountForTest())
|
||||
}
|
||||
|
||||
if err := s.Clear(ctx, "ns1", "client-A", "typing:room1", "k1"); err != nil {
|
||||
t.Fatalf("Clear: %v", err)
|
||||
}
|
||||
if s.keyCountForTest() != 0 {
|
||||
t.Errorf("expected 0 stored keys after clear, got %d", s.keyCountForTest())
|
||||
}
|
||||
|
||||
if got := pub.countKind(EphemeralEventSet); got != 1 {
|
||||
t.Errorf("set events = %d, want 1", got)
|
||||
}
|
||||
if got := pub.countKind(EphemeralEventClear); got != 1 {
|
||||
t.Errorf("clear events = %d, want 1", got)
|
||||
}
|
||||
// The set event must carry the payload verbatim.
|
||||
evts := pub.snapshot()
|
||||
if string(evts[0].event.Payload) != `{"typing":true}` {
|
||||
t.Errorf("set payload = %q, want the original JSON", evts[0].event.Payload)
|
||||
}
|
||||
if evts[1].event.Reason != "explicit" {
|
||||
t.Errorf("clear reason = %q, want explicit", evts[1].event.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_SetThenDisconnect(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := s.Set(ctx, "ns1", "client-A", "topicX", "kA", []byte("p1"), 0); err != nil {
|
||||
t.Fatalf("Set kA: %v", err)
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "client-A", "topicY", "kB", []byte("p2"), 0); err != nil {
|
||||
t.Fatalf("Set kB: %v", err)
|
||||
}
|
||||
|
||||
s.ClearClient(ctx, "client-A")
|
||||
|
||||
if s.keyCountForTest() != 0 {
|
||||
t.Errorf("expected all state dropped on disconnect, got %d", s.keyCountForTest())
|
||||
}
|
||||
// One synthetic clear per owned key, all reason=disconnect.
|
||||
if got := pub.countKind(EphemeralEventClear); got != 2 {
|
||||
t.Errorf("disconnect clear events = %d, want 2", got)
|
||||
}
|
||||
for _, e := range pub.snapshot() {
|
||||
if e.event.Kind == EphemeralEventClear && e.event.Reason != "disconnect" {
|
||||
t.Errorf("clear reason = %q, want disconnect", e.event.Reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_TTLExpiry(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
ctx := context.Background()
|
||||
|
||||
// Freeze the clock so we control expiry deterministically.
|
||||
base := time.Now()
|
||||
s.now = func() time.Time { return base }
|
||||
|
||||
if err := s.Set(ctx, "ns1", "client-A", "topicX", "kA", []byte("p"), 1000); err != nil {
|
||||
t.Fatalf("Set: %v", err)
|
||||
}
|
||||
|
||||
// Before expiry: sweep is a no-op.
|
||||
s.sweepExpired(ctx)
|
||||
if s.keyCountForTest() != 1 {
|
||||
t.Fatalf("entry expired too early, count=%d", s.keyCountForTest())
|
||||
}
|
||||
|
||||
// Advance past the 1s TTL and sweep.
|
||||
s.now = func() time.Time { return base.Add(2 * time.Second) }
|
||||
s.sweepExpired(ctx)
|
||||
if s.keyCountForTest() != 0 {
|
||||
t.Errorf("entry not swept after TTL, count=%d", s.keyCountForTest())
|
||||
}
|
||||
|
||||
// A clear event with reason=expired must have been published.
|
||||
foundExpired := false
|
||||
for _, e := range pub.snapshot() {
|
||||
if e.event.Kind == EphemeralEventClear && e.event.Reason == "expired" {
|
||||
foundExpired = true
|
||||
}
|
||||
}
|
||||
if !foundExpired {
|
||||
t.Error("expected a clear event with reason=expired")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_TTLClampedToMax(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
base := time.Now()
|
||||
s.now = func() time.Time { return base }
|
||||
|
||||
// Request a TTL far beyond the max; it must be clamped.
|
||||
huge := (ephemeralMaxTTL + time.Hour).Milliseconds()
|
||||
if err := s.Set(context.Background(), "ns1", "c", "t", "k", []byte("p"), huge); err != nil {
|
||||
t.Fatalf("Set: %v", err)
|
||||
}
|
||||
s.mu.Lock()
|
||||
entry := s.values[ephemeralStateKey{namespace: "ns1", topic: "t", key: "k"}]
|
||||
s.mu.Unlock()
|
||||
if entry == nil {
|
||||
t.Fatal("entry missing")
|
||||
}
|
||||
maxExpiry := base.Add(ephemeralMaxTTL)
|
||||
if entry.expiresAt.After(maxExpiry) {
|
||||
t.Errorf("TTL not clamped: expiresAt %v after max %v", entry.expiresAt, maxExpiry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_PerClientCapEnforced(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
ctx := context.Background()
|
||||
|
||||
for i := 0; i < ephemeralMaxKeysPerClient; i++ {
|
||||
if err := s.Set(ctx, "ns1", "client-A", "t", fmt.Sprintf("k%d", i), []byte("p"), 0); err != nil {
|
||||
t.Fatalf("Set #%d: %v", i, err)
|
||||
}
|
||||
}
|
||||
// The next NEW key must be rejected.
|
||||
err := s.Set(ctx, "ns1", "client-A", "t", "overflow", []byte("p"), 0)
|
||||
if err == nil {
|
||||
t.Fatal("expected per-client cap error")
|
||||
}
|
||||
if s.keyCountForTest() != ephemeralMaxKeysPerClient {
|
||||
t.Errorf("stored keys = %d, want %d (overflow must not be stored)", s.keyCountForTest(), ephemeralMaxKeysPerClient)
|
||||
}
|
||||
|
||||
// Overwriting an EXISTING key must still succeed even at the cap.
|
||||
if err := s.Set(ctx, "ns1", "client-A", "t", "k0", []byte("updated"), 0); err != nil {
|
||||
t.Errorf("overwrite at cap rejected: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_ClientIsolation(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := s.Set(ctx, "ns1", "client-A", "t", "kA", []byte("a"), 0); err != nil {
|
||||
t.Fatalf("Set A: %v", err)
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "client-B", "t", "kB", []byte("b"), 0); err != nil {
|
||||
t.Fatalf("Set B: %v", err)
|
||||
}
|
||||
|
||||
// Disconnecting A must NOT touch B's state.
|
||||
s.ClearClient(ctx, "client-A")
|
||||
if s.keyCountForTest() != 1 {
|
||||
t.Fatalf("expected B's single key to survive A's disconnect, got %d", s.keyCountForTest())
|
||||
}
|
||||
s.mu.Lock()
|
||||
_, bSurvives := s.values[ephemeralStateKey{namespace: "ns1", topic: "t", key: "kB"}]
|
||||
s.mu.Unlock()
|
||||
if !bSurvives {
|
||||
t.Error("client-B's state was wrongly cleared by client-A's disconnect")
|
||||
}
|
||||
|
||||
// A also cannot clear B's key (not the owner): idempotent no-op.
|
||||
if err := s.Clear(ctx, "ns1", "client-A", "t", "kB"); err != nil {
|
||||
t.Fatalf("cross-client Clear should be a no-op, got err: %v", err)
|
||||
}
|
||||
if s.keyCountForTest() != 1 {
|
||||
t.Error("client-A managed to clear client-B's key")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_SetValidation(t *testing.T) {
|
||||
s := newTestStore(nil)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := s.Set(ctx, "ns1", "", "t", "k", nil, 0); err == nil {
|
||||
t.Error("expected error for empty client ID")
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "c", "", "k", nil, 0); err == nil {
|
||||
t.Error("expected error for empty topic")
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "c", "t", "", nil, 0); err == nil {
|
||||
t.Error("expected error for empty key")
|
||||
}
|
||||
big := make([]byte, ephemeralMaxPayloadBytes+1)
|
||||
if err := s.Set(ctx, "ns1", "c", "t", "k", big, 0); err == nil {
|
||||
t.Error("expected error for oversized payload")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_ClearClientUnknownIsNoOp(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
// No panic, no events for an unknown client.
|
||||
s.ClearClient(context.Background(), "nobody")
|
||||
if len(pub.snapshot()) != 0 {
|
||||
t.Error("ClearClient on unknown client should publish nothing")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEphemeralStore_OwnershipTransfer(t *testing.T) {
|
||||
pub := &capturePublisher{}
|
||||
s := newTestStore(pub.publish)
|
||||
ctx := context.Background()
|
||||
|
||||
// client-A sets, then client-B overwrites the SAME (topic,key).
|
||||
if err := s.Set(ctx, "ns1", "client-A", "t", "shared", []byte("a"), 0); err != nil {
|
||||
t.Fatalf("Set A: %v", err)
|
||||
}
|
||||
if err := s.Set(ctx, "ns1", "client-B", "t", "shared", []byte("b"), 0); err != nil {
|
||||
t.Fatalf("Set B: %v", err)
|
||||
}
|
||||
|
||||
// A's disconnect must NOT clear the key now owned by B.
|
||||
s.ClearClient(ctx, "client-A")
|
||||
if s.keyCountForTest() != 1 {
|
||||
t.Errorf("ownership transfer failed: key dropped on prior owner's disconnect, count=%d", s.keyCountForTest())
|
||||
}
|
||||
|
||||
// B's disconnect clears it.
|
||||
s.ClearClient(ctx, "client-B")
|
||||
if s.keyCountForTest() != 0 {
|
||||
t.Errorf("new owner's disconnect did not clear, count=%d", s.keyCountForTest())
|
||||
}
|
||||
}
|
||||
@ -3,6 +3,7 @@ package execution
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
cryptorand "crypto/rand"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
@ -80,7 +81,15 @@ func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledMo
|
||||
// invocation that uses time.Now() (receipts, audit rows, cursor cmp).
|
||||
// Same fix applied at engine.go for the persistent-WS path.
|
||||
WithSysWalltime().
|
||||
WithSysNanotime()
|
||||
WithSysNanotime().
|
||||
// Bugboard #120 — same class as #27. Without WithRandSource, wazero
|
||||
// uses a deterministic zero-seed RNG, so TinyGo's crypto/rand.Read
|
||||
// returns IDENTICAL bytes on every fresh instance (and every
|
||||
// invocation is a fresh instance). That makes any unguessable ID /
|
||||
// code / nonce / token constant. Wire in the host CSPRNG so
|
||||
// crypto/rand (and auto-seeded math/rand) work. Same fix at
|
||||
// engine.go for the persistent-WS path.
|
||||
WithRandSource(cryptorand.Reader)
|
||||
|
||||
// Acquire concurrency slot
|
||||
if e.sem != nil {
|
||||
|
||||
181
core/pkg/serverless/execution/randsource_test.go
Normal file
181
core/pkg/serverless/execution/randsource_test.go
Normal file
@ -0,0 +1,181 @@
|
||||
package execution
|
||||
|
||||
import (
|
||||
"context"
|
||||
cryptorand "crypto/rand"
|
||||
"encoding/binary"
|
||||
"testing"
|
||||
|
||||
"github.com/tetratelabs/wazero"
|
||||
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
|
||||
)
|
||||
|
||||
// Bugboard #120 — wazero defaults to a DETERMINISTIC (zero-seed) RNG source.
|
||||
// TinyGo wasm's crypto/rand.Read calls WASI random_get, so without
|
||||
// .WithRandSource(crypto/rand.Reader) every fresh instance gets the IDENTICAL
|
||||
// "random" byte sequence. Each serverless invocation is a fresh instance, so
|
||||
// any unguessable code / nonce / token a function generates is constant (the
|
||||
// observed "8LRJ2S on every rotate" symptom).
|
||||
//
|
||||
// The fix is .WithRandSource(cryptorand.Reader) on BOTH wazero moduleConfig
|
||||
// builders — executor.go (stateless) and engine.go (persistent WS). This test
|
||||
// pins the executor's config path: instantiate the SAME config twice and assert
|
||||
// the two instances produce DIFFERENT random bytes.
|
||||
//
|
||||
// If a future refactor drops .WithRandSource(), the positive test fails with a
|
||||
// clear message; the negative control documents why the fix is necessary.
|
||||
|
||||
// randProbeWasm is a hand-assembled WASM module that imports
|
||||
// wasi_snapshot_preview1.random_get and calls it from _start, writing 8 random
|
||||
// bytes to memory[0:8].
|
||||
//
|
||||
// (module
|
||||
// (type $random_get (func (param i32 i32) (result i32)))
|
||||
// (type $start (func))
|
||||
// (import "wasi_snapshot_preview1" "random_get"
|
||||
// (func $random_get (type 0)))
|
||||
// (memory (export "memory") 1)
|
||||
// (func $_start (type 1)
|
||||
// i32.const 0 ;; buf = 0
|
||||
// i32.const 8 ;; buf_len = 8
|
||||
// call $random_get
|
||||
// drop)
|
||||
// (export "_start" (func $_start)))
|
||||
var randProbeWasm = []byte{
|
||||
// Magic + version
|
||||
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
|
||||
|
||||
// Type section (id=1) — body=10 bytes
|
||||
0x01,
|
||||
0x0a,
|
||||
0x02, // 2 types
|
||||
0x60, 0x02, 0x7f, 0x7f, // type 0: func(i32, i32)
|
||||
0x01, 0x7f, // -> (i32)
|
||||
0x60, 0x00, 0x00, // type 1: func() -> ()
|
||||
|
||||
// Import section (id=2) — body=0x25 (37 bytes)
|
||||
0x02,
|
||||
0x25,
|
||||
0x01, // 1 import
|
||||
0x16, // module name "wasi_snapshot_preview1" length=22
|
||||
0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, 0x65, 0x77, 0x31,
|
||||
0x0a, // fn name "random_get" length=10
|
||||
0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x5f, 0x67, 0x65, 0x74,
|
||||
0x00, 0x00, // kind=func, type idx=0
|
||||
|
||||
// Function section (id=3) — body=2 bytes
|
||||
0x03,
|
||||
0x02,
|
||||
0x01, // 1 function
|
||||
0x01, // type idx 1 (for _start)
|
||||
|
||||
// Memory section (id=5) — body=3 bytes
|
||||
0x05,
|
||||
0x03,
|
||||
0x01, // 1 memory
|
||||
0x00, 0x01, // limits: flags=0 (no max), min=1 page
|
||||
|
||||
// Export section (id=7) — body=19 bytes (0x13)
|
||||
0x07,
|
||||
0x13,
|
||||
0x02, // 2 exports
|
||||
0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, // "memory"
|
||||
0x02, 0x00, // kind=memory, idx=0
|
||||
0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, // "_start"
|
||||
0x00, 0x01, // kind=func, idx=1 (after the 1 import)
|
||||
|
||||
// Code section (id=10) — body=11 bytes (0x0b)
|
||||
0x0a,
|
||||
0x0b,
|
||||
0x01, // 1 function body
|
||||
0x09, // body size = 9
|
||||
0x00, // 0 local groups
|
||||
0x41, 0x00, // i32.const 0 (buf)
|
||||
0x41, 0x08, // i32.const 8 (buf_len)
|
||||
0x10, 0x00, // call func 0 (the imported random_get)
|
||||
0x1a, // drop (errno return)
|
||||
0x0b, // end
|
||||
}
|
||||
|
||||
// readProbeRandom instantiates randProbeWasm once with the given moduleConfig
|
||||
// transform and returns the 8 random bytes written to memory[0:8].
|
||||
func readProbeRandom(t *testing.T, runtime wazero.Runtime, compiled wazero.CompiledModule, cfg wazero.ModuleConfig) uint64 {
|
||||
t.Helper()
|
||||
ctx := context.Background()
|
||||
mod, err := runtime.InstantiateModule(ctx, compiled, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("instantiate probe module: %v", err)
|
||||
}
|
||||
defer mod.Close(ctx)
|
||||
raw, ok := mod.Memory().Read(0, 8)
|
||||
if !ok {
|
||||
t.Fatal("could not read 8 bytes from probe memory at offset 0")
|
||||
}
|
||||
return binary.LittleEndian.Uint64(raw)
|
||||
}
|
||||
|
||||
func TestModuleConfig_randSourceIsRealNotDeterministic(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
runtime := wazero.NewRuntime(ctx)
|
||||
defer runtime.Close(ctx)
|
||||
|
||||
if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil {
|
||||
t.Fatalf("instantiate WASI: %v", err)
|
||||
}
|
||||
compiled, err := runtime.CompileModule(ctx, randProbeWasm)
|
||||
if err != nil {
|
||||
t.Fatalf("compile probe wasm: %v (hex assembly likely off; recompute section sizes)", err)
|
||||
}
|
||||
defer compiled.Close(ctx)
|
||||
|
||||
// Mirror the executor.go moduleConfig — anonymous instance, real RNG. Two
|
||||
// separate instantiations of the SAME config must produce different bytes.
|
||||
newCfg := func() wazero.ModuleConfig {
|
||||
return wazero.NewModuleConfig().
|
||||
WithName("").
|
||||
WithArgs("probe").
|
||||
WithSysWalltime().
|
||||
WithSysNanotime().
|
||||
WithRandSource(cryptorand.Reader)
|
||||
}
|
||||
|
||||
a := readProbeRandom(t, runtime, compiled, newCfg())
|
||||
b := readProbeRandom(t, runtime, compiled, newCfg())
|
||||
if a == b {
|
||||
t.Errorf("BUG #120 REGRESSION: two fresh instances produced IDENTICAL random "+
|
||||
"bytes (%#016x) — crypto/rand is deterministic. Did the "+
|
||||
".WithRandSource(cryptorand.Reader) call get dropped from moduleConfig "+
|
||||
"in executor.go or engine.go?", a)
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleConfig_randWithoutFix_demoDeterministic(t *testing.T) {
|
||||
// Negative control: WITHOUT .WithRandSource(), confirm wazero's default RNG
|
||||
// is deterministic (identical bytes across fresh instances). This pins the
|
||||
// *cause*. If wazero ever defaults to a real entropy source, this test
|
||||
// fails — making the change visible instead of silently invalidating the
|
||||
// fix's necessity.
|
||||
ctx := context.Background()
|
||||
runtime := wazero.NewRuntime(ctx)
|
||||
defer runtime.Close(ctx)
|
||||
|
||||
if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil {
|
||||
t.Fatalf("instantiate WASI: %v", err)
|
||||
}
|
||||
compiled, err := runtime.CompileModule(ctx, randProbeWasm)
|
||||
if err != nil {
|
||||
t.Fatalf("compile probe wasm: %v", err)
|
||||
}
|
||||
defer compiled.Close(ctx)
|
||||
|
||||
newDefault := func() wazero.ModuleConfig {
|
||||
return wazero.NewModuleConfig().WithName("").WithArgs("probe")
|
||||
}
|
||||
a := readProbeRandom(t, runtime, compiled, newDefault())
|
||||
b := readProbeRandom(t, runtime, compiled, newDefault())
|
||||
if a != b {
|
||||
t.Skipf("wazero default RandSource now differs across instances (%#016x vs %#016x) — "+
|
||||
"if real-by-default upstream, the bug-#120 fix may be redundant; review", a, b)
|
||||
}
|
||||
// Determinism confirmed → fix is meaningful.
|
||||
}
|
||||
@ -134,6 +134,18 @@ func (m *mockHostServices) WSPubSubUnbridge(ctx context.Context, clientID, topic
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHostServices) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
|
||||
return SetRawHTTPResponse(ctx, status, headers, body)
|
||||
}
|
||||
|
||||
func (m *mockHostServices) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHostServices) EphemeralStateClear(ctx context.Context, topic, key string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
package hostfunctions
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
@ -57,7 +58,7 @@ func NewHostFunctions(
|
||||
anyoneHTTPClient.Timeout = httpTimeout
|
||||
}
|
||||
|
||||
return &HostFunctions{
|
||||
hf := &HostFunctions{
|
||||
db: db,
|
||||
cacheClient: cacheClient,
|
||||
storage: storage,
|
||||
@ -77,4 +78,28 @@ func NewHostFunctions(
|
||||
logs: make([]serverless.LogEntry, 0),
|
||||
asyncInvokeSem: make(chan struct{}, asyncInvokeMaxInFlight),
|
||||
}
|
||||
|
||||
// Ephemeral-state store (bugboard #710). Publishes synthetic set/clear
|
||||
// events through the same pubsub adapter the pubsub_publish host fn uses,
|
||||
// and registers a WS disconnect hook so a client's owned state auto-clears
|
||||
// the instant its WebSocket drops — zero cron lag. Only wired when a
|
||||
// concrete WSManager is present (the disconnect hook + sweeper need it);
|
||||
// otherwise ephemeral_state_set returns an error.
|
||||
if wsm, ok := wsManager.(*serverless.WSManager); ok && wsm != nil {
|
||||
var publish func(ctx context.Context, namespace, topic string, data []byte) error
|
||||
if pubsubAdapter != nil {
|
||||
publish = func(ctx context.Context, _ string, topic string, data []byte) error {
|
||||
// The adapter namespaces internally (same as PubSubPublish), so
|
||||
// the namespace arg is informational only here.
|
||||
return pubsubAdapter.Publish(ctx, topic, data)
|
||||
}
|
||||
}
|
||||
hf.ephemeralStore = serverless.NewEphemeralStore(publish)
|
||||
wsm.AddDisconnectHook(func(clientID string) {
|
||||
hf.ephemeralStore.ClearClient(context.Background(), clientID)
|
||||
})
|
||||
hf.ephemeralStore.StartSweeper()
|
||||
}
|
||||
|
||||
return hf
|
||||
}
|
||||
|
||||
@ -17,6 +17,18 @@ func (h *HostFunctions) HTTPFetch(ctx context.Context, method, url string, heade
|
||||
return h.doFetch(ctx, "http_fetch", h.httpClient, method, url, headers, body)
|
||||
}
|
||||
|
||||
// SetHTTPResponse records a verbatim HTTP response for a RawHTTPResponse
|
||||
// function (bugboard #835). It delegates to the per-invocation collector
|
||||
// attached on ctx by the engine; the HTTP invoke handler replays the result
|
||||
// byte-for-byte. Validation (raw mode enabled, status range, header/body caps)
|
||||
// lives in serverless.SetRawHTTPResponse.
|
||||
func (h *HostFunctions) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
|
||||
if err := serverless.SetRawHTTPResponse(ctx, status, headers, body); err != nil {
|
||||
return &serverless.HostFunctionError{Function: "set_http_response", Cause: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AnyoneFetch makes an outbound HTTP request routed through the Anyone
|
||||
// (ANyONe protocol) SOCKS5 proxy, so the third-party endpoint sees an
|
||||
// Anyone exit IP instead of the gateway IP and the gateway can't
|
||||
|
||||
@ -186,6 +186,40 @@ func dedupBatchByTopic(msgs []pubsub.TopicMessage) []pubsub.TopicMessage {
|
||||
return out
|
||||
}
|
||||
|
||||
// EphemeralStateSet records WS-subscribe-tracked ephemeral state for the
|
||||
// current invocation's WS client and publishes a "set" event (bugboard #710).
|
||||
// The owning client ID and namespace are derived from the invocation context —
|
||||
// the function cannot spoof them. Auto-clears on the client's WS disconnect.
|
||||
func (h *HostFunctions) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
|
||||
if h.ephemeralStore == nil {
|
||||
return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
|
||||
}
|
||||
cur := h.currentInvocationContext(ctx)
|
||||
if cur == nil {
|
||||
return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: fmt.Errorf("no invocation context")}
|
||||
}
|
||||
if err := h.ephemeralStore.Set(ctx, cur.Namespace, cur.WSClientID, topic, key, payload, ttlMs); err != nil {
|
||||
return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EphemeralStateClear removes ephemeral state the current WS client owns and
|
||||
// publishes a "clear" event (bugboard #710). Idempotent.
|
||||
func (h *HostFunctions) EphemeralStateClear(ctx context.Context, topic, key string) error {
|
||||
if h.ephemeralStore == nil {
|
||||
return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
|
||||
}
|
||||
cur := h.currentInvocationContext(ctx)
|
||||
if cur == nil {
|
||||
return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: fmt.Errorf("no invocation context")}
|
||||
}
|
||||
if err := h.ephemeralStore.Clear(ctx, cur.Namespace, cur.WSClientID, topic, key); err != nil {
|
||||
return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WSSend sends data to a specific WebSocket client.
|
||||
func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||
if h.wsManager == nil {
|
||||
|
||||
@ -14,6 +14,9 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// secretsKeyBytes is the required length of the AES-256 encryption key.
|
||||
const secretsKeyBytes = 32
|
||||
|
||||
// DBSecretsManager implements SecretsManager using the database.
|
||||
type DBSecretsManager struct {
|
||||
db rqlite.Client
|
||||
@ -25,21 +28,34 @@ type DBSecretsManager struct {
|
||||
var _ serverless.SecretsManager = (*DBSecretsManager)(nil)
|
||||
|
||||
// NewDBSecretsManager creates a secrets manager backed by the database.
|
||||
func NewDBSecretsManager(db rqlite.Client, encryptionKeyHex string, logger *zap.Logger) (*DBSecretsManager, error) {
|
||||
//
|
||||
// encryptionKeyHex must be a 32-byte AES-256 key, hex-encoded (64 chars).
|
||||
//
|
||||
// When encryptionKeyHex is empty the behaviour depends on allowEphemeral:
|
||||
// - allowEphemeral=false (production): returns an error. A misconfigured
|
||||
// node must fail loudly rather than silently generate a per-process
|
||||
// ephemeral key. With an ephemeral key, secrets encrypted by one
|
||||
// process cannot be decrypted by another (or after a restart), which
|
||||
// makes get_secret return garbage/errors (bugboard #837).
|
||||
// - allowEphemeral=true (tests/dev): generates a random per-process key
|
||||
// and logs a warning. Secrets will not persist across restarts.
|
||||
func NewDBSecretsManager(db rqlite.Client, encryptionKeyHex string, allowEphemeral bool, logger *zap.Logger) (*DBSecretsManager, error) {
|
||||
var key []byte
|
||||
if encryptionKeyHex != "" {
|
||||
var err error
|
||||
key, err = hex.DecodeString(encryptionKeyHex)
|
||||
if err != nil || len(key) != 32 {
|
||||
return nil, fmt.Errorf("invalid encryption key: must be 32 bytes hex-encoded")
|
||||
if err != nil || len(key) != secretsKeyBytes {
|
||||
return nil, fmt.Errorf("invalid secrets encryption key: must be %d bytes hex-encoded (%d hex chars)", secretsKeyBytes, secretsKeyBytes*2)
|
||||
}
|
||||
} else {
|
||||
// Generate a random key if none provided
|
||||
key = make([]byte, 32)
|
||||
} else if allowEphemeral {
|
||||
// Generate a random per-process key (dev/test only).
|
||||
key = make([]byte, secretsKeyBytes)
|
||||
if _, err := rand.Read(key); err != nil {
|
||||
return nil, fmt.Errorf("failed to generate encryption key: %w", err)
|
||||
return nil, fmt.Errorf("failed to generate ephemeral secrets encryption key: %w", err)
|
||||
}
|
||||
logger.Warn("Generated random secrets encryption key - secrets will not persist across restarts")
|
||||
logger.Warn("Generated random ephemeral secrets encryption key - secrets will NOT persist across restarts (dev/test only)")
|
||||
} else {
|
||||
return nil, fmt.Errorf("secrets encryption key is required: set secrets_encryption_key (see %s/secrets/secrets-encryption-key); without it secrets cannot be decrypted across processes or restarts (bugboard #837)", "~/.orama")
|
||||
}
|
||||
|
||||
return &DBSecretsManager{
|
||||
|
||||
199
core/pkg/serverless/hostfunctions/secrets_test.go
Normal file
199
core/pkg/serverless/hostfunctions/secrets_test.go
Normal file
@ -0,0 +1,199 @@
|
||||
package hostfunctions
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/rqlite"
|
||||
"github.com/DeBrosOfficial/network/pkg/serverless"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// fakeSecretsDB is an in-memory rqlite.Client stub that implements only the
|
||||
// Exec/Query paths used by DBSecretsManager (INSERT...ON CONFLICT upsert and
|
||||
// SELECT by namespace+name). Storing the encrypted blob in a map lets us
|
||||
// round-trip a Set through a Get — the core of the bugboard #837 regression.
|
||||
type fakeSecretsDB struct {
|
||||
rqlite.Client
|
||||
store map[string][]byte // key: namespace\x00name -> encrypted_value
|
||||
}
|
||||
|
||||
func newFakeSecretsDB() *fakeSecretsDB {
|
||||
return &fakeSecretsDB{store: map[string][]byte{}}
|
||||
}
|
||||
|
||||
func storeKey(namespace, name string) string {
|
||||
return namespace + "\x00" + name
|
||||
}
|
||||
|
||||
// Exec handles the upsert. args order matches secrets.go Set():
|
||||
// (id, namespace, name, encrypted_value, created_at, updated_at).
|
||||
func (f *fakeSecretsDB) Exec(ctx context.Context, query string, args ...any) (sql.Result, error) {
|
||||
if strings.Contains(query, "INSERT INTO function_secrets") {
|
||||
namespace, _ := args[1].(string)
|
||||
name, _ := args[2].(string)
|
||||
enc, _ := args[3].([]byte)
|
||||
cp := make([]byte, len(enc))
|
||||
copy(cp, enc)
|
||||
f.store[storeKey(namespace, name)] = cp
|
||||
return fakeResult{rows: 1}, nil
|
||||
}
|
||||
return fakeResult{}, nil
|
||||
}
|
||||
|
||||
// Query handles the SELECT encrypted_value ... WHERE namespace=? AND name=?.
|
||||
func (f *fakeSecretsDB) Query(ctx context.Context, dest any, query string, args ...any) error {
|
||||
if !strings.Contains(query, "SELECT encrypted_value") {
|
||||
return errors.New("unexpected query")
|
||||
}
|
||||
namespace, _ := args[0].(string)
|
||||
name, _ := args[1].(string)
|
||||
rows, ok := dest.(*[]struct {
|
||||
EncryptedValue []byte `db:"encrypted_value"`
|
||||
})
|
||||
if !ok {
|
||||
return errors.New("unexpected dest type")
|
||||
}
|
||||
if enc, found := f.store[storeKey(namespace, name)]; found {
|
||||
*rows = append(*rows, struct {
|
||||
EncryptedValue []byte `db:"encrypted_value"`
|
||||
}{EncryptedValue: enc})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type fakeResult struct{ rows int64 }
|
||||
|
||||
func (r fakeResult) LastInsertId() (int64, error) { return 0, nil }
|
||||
func (r fakeResult) RowsAffected() (int64, error) { return r.rows, nil }
|
||||
|
||||
// validKey is a 32-byte AES-256 key, hex-encoded (64 chars).
|
||||
const validKey = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
|
||||
|
||||
// otherKey is a different valid 32-byte key.
|
||||
const otherKey = "fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210"
|
||||
|
||||
// TestDBSecretsManager_SetGetRoundTrip_sameKey proves the fix: a secret
|
||||
// encrypted with a fixed key is decryptable by a SEPARATE manager constructed
|
||||
// with the SAME key (simulating another process / a restart).
|
||||
func TestDBSecretsManager_SetGetRoundTrip_sameKey(t *testing.T) {
|
||||
db := newFakeSecretsDB()
|
||||
logger := zap.NewNop()
|
||||
ctx := context.Background()
|
||||
|
||||
writer, err := NewDBSecretsManager(db, validKey, false, logger)
|
||||
if err != nil {
|
||||
t.Fatalf("NewDBSecretsManager (writer) failed: %v", err)
|
||||
}
|
||||
if err := writer.Set(ctx, "ns1", "API_TOKEN", "s3cr3t-value"); err != nil {
|
||||
t.Fatalf("Set failed: %v", err)
|
||||
}
|
||||
|
||||
// A fresh manager with the SAME key (different process / post-restart).
|
||||
reader, err := NewDBSecretsManager(db, validKey, false, logger)
|
||||
if err != nil {
|
||||
t.Fatalf("NewDBSecretsManager (reader) failed: %v", err)
|
||||
}
|
||||
got, err := reader.Get(ctx, "ns1", "API_TOKEN")
|
||||
if err != nil {
|
||||
t.Fatalf("Get failed: %v", err)
|
||||
}
|
||||
if got != "s3cr3t-value" {
|
||||
t.Errorf("Get returned %q, want %q", got, "s3cr3t-value")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDBSecretsManager_GetWithDifferentKey_fails proves the bug it guards
|
||||
// against: a manager with a DIFFERENT key cannot decrypt — exactly what
|
||||
// happened when each process generated its own ephemeral key (bugboard #837).
|
||||
func TestDBSecretsManager_GetWithDifferentKey_fails(t *testing.T) {
|
||||
db := newFakeSecretsDB()
|
||||
logger := zap.NewNop()
|
||||
ctx := context.Background()
|
||||
|
||||
writer, err := NewDBSecretsManager(db, validKey, false, logger)
|
||||
if err != nil {
|
||||
t.Fatalf("NewDBSecretsManager (writer) failed: %v", err)
|
||||
}
|
||||
if err := writer.Set(ctx, "ns1", "API_TOKEN", "s3cr3t-value"); err != nil {
|
||||
t.Fatalf("Set failed: %v", err)
|
||||
}
|
||||
|
||||
reader, err := NewDBSecretsManager(db, otherKey, false, logger)
|
||||
if err != nil {
|
||||
t.Fatalf("NewDBSecretsManager (reader) failed: %v", err)
|
||||
}
|
||||
if _, err := reader.Get(ctx, "ns1", "API_TOKEN"); err == nil {
|
||||
t.Fatal("expected decryption to fail with a different key, got nil error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDBSecretsManager_emptyKey_isLoud verifies the production constructor
|
||||
// refuses to start with an empty key (allowEphemeral=false) instead of
|
||||
// silently generating an undecryptable ephemeral key.
|
||||
func TestDBSecretsManager_emptyKey_isLoud(t *testing.T) {
|
||||
db := newFakeSecretsDB()
|
||||
_, err := NewDBSecretsManager(db, "", false, zap.NewNop())
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty key with allowEphemeral=false, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "secrets encryption key is required") {
|
||||
t.Errorf("unexpected error message: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDBSecretsManager_emptyKey_ephemeralAllowed verifies tests/dev can still
|
||||
// opt into a per-process ephemeral key.
|
||||
func TestDBSecretsManager_emptyKey_ephemeralAllowed(t *testing.T) {
|
||||
db := newFakeSecretsDB()
|
||||
mgr, err := NewDBSecretsManager(db, "", true, zap.NewNop())
|
||||
if err != nil {
|
||||
t.Fatalf("expected ephemeral key to be allowed, got error: %v", err)
|
||||
}
|
||||
// Ephemeral key still round-trips within the same process.
|
||||
ctx := context.Background()
|
||||
if err := mgr.Set(ctx, "ns1", "K", "v"); err != nil {
|
||||
t.Fatalf("Set failed: %v", err)
|
||||
}
|
||||
got, err := mgr.Get(ctx, "ns1", "K")
|
||||
if err != nil {
|
||||
t.Fatalf("Get failed: %v", err)
|
||||
}
|
||||
if got != "v" {
|
||||
t.Errorf("Get returned %q, want %q", got, "v")
|
||||
}
|
||||
}
|
||||
|
||||
// TestDBSecretsManager_invalidKey_rejected covers malformed keys (wrong
|
||||
// length, non-hex) at the boundary.
|
||||
func TestDBSecretsManager_invalidKey_rejected(t *testing.T) {
|
||||
db := newFakeSecretsDB()
|
||||
cases := map[string]string{
|
||||
"too short": "abcd",
|
||||
"odd hex": "abc",
|
||||
"not hex": strings.Repeat("zz", 32),
|
||||
"wrong bytes": "0123456789abcdef", // 8 bytes, not 32
|
||||
}
|
||||
for name, key := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
if _, err := NewDBSecretsManager(db, key, false, zap.NewNop()); err == nil {
|
||||
t.Fatalf("expected error for invalid key %q, got nil", key)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestDBSecretsManager_Get_notFound verifies the not-found sentinel survives.
|
||||
func TestDBSecretsManager_Get_notFound(t *testing.T) {
|
||||
db := newFakeSecretsDB()
|
||||
mgr, err := NewDBSecretsManager(db, validKey, false, zap.NewNop())
|
||||
if err != nil {
|
||||
t.Fatalf("NewDBSecretsManager failed: %v", err)
|
||||
}
|
||||
if _, err := mgr.Get(context.Background(), "ns1", "missing"); !errors.Is(err, serverless.ErrSecretNotFound) {
|
||||
t.Errorf("expected ErrSecretNotFound, got %v", err)
|
||||
}
|
||||
}
|
||||
@ -97,6 +97,13 @@ type HostFunctions struct {
|
||||
triggerDispatcher *triggers.PubSubDispatcher
|
||||
triggerDispatcherLock sync.RWMutex
|
||||
|
||||
// ephemeralStore backs ephemeral_state_set / ephemeral_state_clear
|
||||
// (bugboard #710). Constructed in NewHostFunctions when a WS manager is
|
||||
// present; nil otherwise (host fns then return an error). The store
|
||||
// registers a disconnect hook on the WS manager so a client's owned state
|
||||
// auto-clears the instant its WebSocket disconnects.
|
||||
ephemeralStore *serverless.EphemeralStore
|
||||
|
||||
// Current invocation context (set per-execution)
|
||||
invCtx *serverless.InvocationContext
|
||||
invCtxLock sync.RWMutex
|
||||
|
||||
@ -75,6 +75,13 @@ type InvokeResponse struct {
|
||||
Error string `json:"error,omitempty"`
|
||||
DurationMS int64 `json:"duration_ms"`
|
||||
Retries int `json:"retries,omitempty"`
|
||||
|
||||
// RawHTTP carries a verbatim HTTP response set by a RawHTTPResponse
|
||||
// function via set_http_response (bugboard #835). nil for normal
|
||||
// functions and for raw functions that never called set_http_response —
|
||||
// the HTTP handler falls back to the standard JSON/Ack path in that case.
|
||||
// Not serialized; consumed directly by the HTTP invoke handler.
|
||||
RawHTTP *RawHTTPResult `json:"-"`
|
||||
}
|
||||
|
||||
// Invoke executes a function with automatic retry logic.
|
||||
@ -169,6 +176,8 @@ func (i *Invoker) Invoke(ctx context.Context, req *InvokeRequest) (*InvokeRespon
|
||||
}
|
||||
|
||||
response.Status = InvocationStatusSuccess
|
||||
// Surface any verbatim HTTP response the function set (bugboard #835).
|
||||
response.RawHTTP = invCtx.RawHTTP
|
||||
return response, nil
|
||||
}
|
||||
|
||||
|
||||
@ -247,6 +247,18 @@ func (m *MockHostServices) WSPubSubUnbridge(ctx context.Context, clientID, topic
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockHostServices) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
|
||||
return SetRawHTTPResponse(ctx, status, headers, body)
|
||||
}
|
||||
|
||||
func (m *MockHostServices) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockHostServices) EphemeralStateClear(ctx context.Context, topic, key string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
142
core/pkg/serverless/raw_http.go
Normal file
142
core/pkg/serverless/raw_http.go
Normal file
@ -0,0 +1,142 @@
|
||||
package serverless
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Raw-HTTP-response mode (bugboard #835).
|
||||
//
|
||||
// A function deployed with RawHTTPResponse=true can emit a verbatim HTTP
|
||||
// response (status + headers + body) instead of the JSON/Ack-wrapped output
|
||||
// the stateless invoke handler normally produces. This lets a namespace app
|
||||
// proxy an upstream RPC (Helius / Alchemy) transparently — the function reads
|
||||
// the request, calls the upstream, and replays the upstream's status, headers,
|
||||
// and body byte-for-byte back to its own caller.
|
||||
//
|
||||
// The primitive provided here is ONLY the response carrier + the host-call
|
||||
// validation. Per-user-JWT quota gating (which the ticket mentions) is the
|
||||
// APP's responsibility: the function can call oh.GetCallerJwtSubject() and
|
||||
// decide whether to serve. The gateway does not implement quota here.
|
||||
|
||||
const (
|
||||
// rawHTTPMaxHeaders caps how many response headers a function may set.
|
||||
// Generous for a proxy use-case (upstream RPCs return well under this)
|
||||
// while bounding the per-invocation allocation a hostile function could
|
||||
// force.
|
||||
rawHTTPMaxHeaders = 64
|
||||
|
||||
// rawHTTPMaxBodyBytes caps the verbatim response body a function may set.
|
||||
// 8 MiB comfortably covers JSON-RPC responses (even large getBlock /
|
||||
// getProgramAccounts payloads) without letting a function buffer an
|
||||
// unbounded body in gateway memory.
|
||||
rawHTTPMaxBodyBytes = 8 << 20
|
||||
|
||||
// rawHTTPMinStatus / rawHTTPMaxStatus bound a valid HTTP status code.
|
||||
rawHTTPMinStatus = 100
|
||||
rawHTTPMaxStatus = 599
|
||||
)
|
||||
|
||||
// RawHTTPResult is a verbatim HTTP response set by a RawHTTPResponse function.
|
||||
// Set is true once the function has called set_http_response at least once;
|
||||
// the invoke handler only takes the raw path when Set is true (otherwise it
|
||||
// falls back to the normal JSON/Ack-wrapped behavior).
|
||||
type RawHTTPResult struct {
|
||||
Status int
|
||||
Headers map[string]string
|
||||
Body []byte
|
||||
Set bool
|
||||
}
|
||||
|
||||
// rawHTTPCollector is the mutable per-invocation sink the set_http_response
|
||||
// host function writes to. It rides the invocation's context (same per-call
|
||||
// propagation model as the publish counter and log buffer) so concurrent
|
||||
// invocations never cross-write each other's response.
|
||||
type rawHTTPCollector struct {
|
||||
mu sync.Mutex
|
||||
result RawHTTPResult
|
||||
}
|
||||
|
||||
// rawHTTPKey is the unexported context-value key for the raw-HTTP collector.
|
||||
type rawHTTPKey struct{}
|
||||
|
||||
// WithRawHTTPCollector returns a derived ctx carrying a FRESH per-invocation
|
||||
// raw-HTTP response collector. The engine attaches this before executing a
|
||||
// RawHTTPResponse function so the set_http_response host call has somewhere to
|
||||
// write; for non-raw functions the collector is absent and the host call is a
|
||||
// validated no-op.
|
||||
func WithRawHTTPCollector(ctx context.Context) context.Context {
|
||||
return context.WithValue(ctx, rawHTTPKey{}, &rawHTTPCollector{})
|
||||
}
|
||||
|
||||
// rawHTTPCollectorFromCtx extracts the collector attached via
|
||||
// WithRawHTTPCollector, or nil if none is present (non-raw function, or an
|
||||
// untracked code path).
|
||||
func rawHTTPCollectorFromCtx(ctx context.Context) *rawHTTPCollector {
|
||||
if ctx == nil {
|
||||
return nil
|
||||
}
|
||||
c, _ := ctx.Value(rawHTTPKey{}).(*rawHTTPCollector)
|
||||
return c
|
||||
}
|
||||
|
||||
// SetRawHTTPResponse records a verbatim HTTP response on the invocation's
|
||||
// collector. Returns an error if no collector is attached (the function was
|
||||
// not deployed with RawHTTPResponse), or if the status / header count / body
|
||||
// size fail validation. Headers may be nil. The body is copied so the caller
|
||||
// (which reads it out of guest WASM memory) may reuse its buffer.
|
||||
func SetRawHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
|
||||
c := rawHTTPCollectorFromCtx(ctx)
|
||||
if c == nil {
|
||||
return fmt.Errorf("set_http_response: function is not deployed with raw_http_response enabled")
|
||||
}
|
||||
if status < rawHTTPMinStatus || status > rawHTTPMaxStatus {
|
||||
return fmt.Errorf("set_http_response: status %d out of range [%d,%d]", status, rawHTTPMinStatus, rawHTTPMaxStatus)
|
||||
}
|
||||
if len(headers) > rawHTTPMaxHeaders {
|
||||
return fmt.Errorf("set_http_response: too many headers (%d > %d)", len(headers), rawHTTPMaxHeaders)
|
||||
}
|
||||
if len(body) > rawHTTPMaxBodyBytes {
|
||||
return fmt.Errorf("set_http_response: body too large (%d bytes > %d)", len(body), rawHTTPMaxBodyBytes)
|
||||
}
|
||||
|
||||
bodyCopy := make([]byte, len(body))
|
||||
copy(bodyCopy, body)
|
||||
|
||||
var hdrCopy map[string]string
|
||||
if len(headers) > 0 {
|
||||
hdrCopy = make(map[string]string, len(headers))
|
||||
for k, v := range headers {
|
||||
hdrCopy[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
c.result = RawHTTPResult{
|
||||
Status: status,
|
||||
Headers: hdrCopy,
|
||||
Body: bodyCopy,
|
||||
Set: true,
|
||||
}
|
||||
c.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// TakeRawHTTPResponse returns the raw HTTP response recorded on the ctx's
|
||||
// collector and whether one was set. Returns (zero, false) when no collector
|
||||
// is attached or the function never called set_http_response. The engine calls
|
||||
// this after Execute to surface the response on the InvokeResponse.
|
||||
func TakeRawHTTPResponse(ctx context.Context) (RawHTTPResult, bool) {
|
||||
c := rawHTTPCollectorFromCtx(ctx)
|
||||
if c == nil {
|
||||
return RawHTTPResult{}, false
|
||||
}
|
||||
c.mu.Lock()
|
||||
res := c.result
|
||||
c.mu.Unlock()
|
||||
if !res.Set {
|
||||
return RawHTTPResult{}, false
|
||||
}
|
||||
return res, true
|
||||
}
|
||||
129
core/pkg/serverless/raw_http_test.go
Normal file
129
core/pkg/serverless/raw_http_test.go
Normal file
@ -0,0 +1,129 @@
|
||||
package serverless
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSetRawHTTPResponse_happyPath(t *testing.T) {
|
||||
ctx := WithRawHTTPCollector(context.Background())
|
||||
|
||||
headers := map[string]string{"Content-Type": "application/json"}
|
||||
body := []byte(`{"jsonrpc":"2.0","result":42}`)
|
||||
if err := SetRawHTTPResponse(ctx, 200, headers, body); err != nil {
|
||||
t.Fatalf("SetRawHTTPResponse: unexpected error: %v", err)
|
||||
}
|
||||
|
||||
res, ok := TakeRawHTTPResponse(ctx)
|
||||
if !ok {
|
||||
t.Fatal("TakeRawHTTPResponse: expected a response to be set")
|
||||
}
|
||||
if res.Status != 200 {
|
||||
t.Errorf("status = %d, want 200", res.Status)
|
||||
}
|
||||
if res.Headers["Content-Type"] != "application/json" {
|
||||
t.Errorf("Content-Type header = %q, want application/json", res.Headers["Content-Type"])
|
||||
}
|
||||
if !bytes.Equal(res.Body, body) {
|
||||
t.Errorf("body = %q, want %q", res.Body, body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetRawHTTPResponse_copiesBodyAndHeaders(t *testing.T) {
|
||||
ctx := WithRawHTTPCollector(context.Background())
|
||||
|
||||
headers := map[string]string{"X-Test": "v1"}
|
||||
body := []byte("original")
|
||||
if err := SetRawHTTPResponse(ctx, 200, headers, body); err != nil {
|
||||
t.Fatalf("SetRawHTTPResponse: %v", err)
|
||||
}
|
||||
|
||||
// Mutate caller-owned buffers AFTER the call — the stored copy must not change.
|
||||
body[0] = 'X'
|
||||
headers["X-Test"] = "mutated"
|
||||
|
||||
res, _ := TakeRawHTTPResponse(ctx)
|
||||
if string(res.Body) != "original" {
|
||||
t.Errorf("body was not copied: got %q", res.Body)
|
||||
}
|
||||
if res.Headers["X-Test"] != "v1" {
|
||||
t.Errorf("headers were not copied: got %q", res.Headers["X-Test"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetRawHTTPResponse_noCollector(t *testing.T) {
|
||||
// No collector attached → the function is not in raw mode; must error.
|
||||
err := SetRawHTTPResponse(context.Background(), 200, nil, []byte("x"))
|
||||
if err == nil {
|
||||
t.Fatal("expected error when no collector is attached")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "raw_http_response") {
|
||||
t.Errorf("error = %q, want it to mention raw_http_response", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetRawHTTPResponse_rejectsBadStatus(t *testing.T) {
|
||||
for _, status := range []int{0, 99, 600, 1000, -1} {
|
||||
ctx := WithRawHTTPCollector(context.Background())
|
||||
if err := SetRawHTTPResponse(ctx, status, nil, nil); err == nil {
|
||||
t.Errorf("status %d: expected validation error, got nil", status)
|
||||
}
|
||||
if _, ok := TakeRawHTTPResponse(ctx); ok {
|
||||
t.Errorf("status %d: response should not be set after a rejected status", status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetRawHTTPResponse_rejectsTooManyHeaders(t *testing.T) {
|
||||
ctx := WithRawHTTPCollector(context.Background())
|
||||
headers := make(map[string]string, rawHTTPMaxHeaders+1)
|
||||
for i := 0; i <= rawHTTPMaxHeaders; i++ {
|
||||
headers["h"+string(rune('a'+i%26))+string(rune('0'+i/26))] = "v"
|
||||
}
|
||||
if len(headers) <= rawHTTPMaxHeaders {
|
||||
t.Fatalf("test setup: expected > %d headers, got %d", rawHTTPMaxHeaders, len(headers))
|
||||
}
|
||||
if err := SetRawHTTPResponse(ctx, 200, headers, nil); err == nil {
|
||||
t.Fatal("expected error for too many headers")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetRawHTTPResponse_rejectsOversizedBody(t *testing.T) {
|
||||
ctx := WithRawHTTPCollector(context.Background())
|
||||
body := make([]byte, rawHTTPMaxBodyBytes+1)
|
||||
if err := SetRawHTTPResponse(ctx, 200, nil, body); err == nil {
|
||||
t.Fatal("expected error for oversized body")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTakeRawHTTPResponse_notSet(t *testing.T) {
|
||||
// Collector attached but set_http_response never called → (zero, false).
|
||||
ctx := WithRawHTTPCollector(context.Background())
|
||||
if _, ok := TakeRawHTTPResponse(ctx); ok {
|
||||
t.Fatal("expected ok=false when no response was set")
|
||||
}
|
||||
|
||||
// No collector at all → also (zero, false).
|
||||
if _, ok := TakeRawHTTPResponse(context.Background()); ok {
|
||||
t.Fatal("expected ok=false with no collector")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetRawHTTPResponse_lastWriteWins(t *testing.T) {
|
||||
ctx := WithRawHTTPCollector(context.Background())
|
||||
if err := SetRawHTTPResponse(ctx, 200, nil, []byte("first")); err != nil {
|
||||
t.Fatalf("first SetRawHTTPResponse: %v", err)
|
||||
}
|
||||
if err := SetRawHTTPResponse(ctx, 503, map[string]string{"Retry-After": "5"}, []byte("second")); err != nil {
|
||||
t.Fatalf("second SetRawHTTPResponse: %v", err)
|
||||
}
|
||||
res, ok := TakeRawHTTPResponse(ctx)
|
||||
if !ok {
|
||||
t.Fatal("expected response to be set")
|
||||
}
|
||||
if res.Status != 503 || string(res.Body) != "second" || res.Headers["Retry-After"] != "5" {
|
||||
t.Errorf("last-write-wins failed: got status=%d body=%q headers=%v", res.Status, res.Body, res.Headers)
|
||||
}
|
||||
}
|
||||
@ -107,8 +107,9 @@ func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmByt
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by,
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
raw_http_response
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
_, err = r.db.Exec(ctx, query,
|
||||
id, fn.Name, fn.Namespace, version, wasmCID,
|
||||
@ -116,6 +117,7 @@ func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmByt
|
||||
fn.RetryCount, retryDelay, fn.DLQTopic,
|
||||
string(FunctionStatusActive), now, now, fn.Namespace,
|
||||
fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn,
|
||||
fn.RawHTTPResponse,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, &DeployError{FunctionName: fn.Name, Cause: fmt.Errorf("failed to register function: %w", err)}
|
||||
@ -154,7 +156,8 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by,
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
raw_http_response
|
||||
FROM functions
|
||||
WHERE namespace = ? AND name = ? AND status = ?
|
||||
ORDER BY version DESC
|
||||
@ -167,7 +170,8 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by,
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
raw_http_response
|
||||
FROM functions
|
||||
WHERE namespace = ? AND name = ? AND version = ?
|
||||
`
|
||||
@ -197,7 +201,8 @@ func (r *Registry) List(ctx context.Context, namespace string) ([]*Function, err
|
||||
f.memory_limit_mb, f.timeout_seconds, f.is_public,
|
||||
f.retry_count, f.retry_delay_seconds, f.dlq_topic,
|
||||
f.status, f.created_at, f.updated_at, f.created_by,
|
||||
f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn
|
||||
f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn,
|
||||
f.raw_http_response
|
||||
FROM functions f
|
||||
INNER JOIN (
|
||||
SELECT namespace, name, MAX(version) as max_version
|
||||
@ -668,6 +673,11 @@ func (r *Registry) rowToFunction(row *functionRow) *Function {
|
||||
WSIdleTimeoutSec: row.WSIdleTimeoutSec,
|
||||
WSMaxFrameBytes: row.WSMaxFrameBytes,
|
||||
WSMaxInflightPerConn: row.WSMaxInflightPerConn,
|
||||
|
||||
// Raw-HTTP-response mode (bugboard #835). Without reading this back
|
||||
// the invoke handler's `if fn.RawHTTPResponse` engine branch never
|
||||
// fires and set_http_response is a no-op for every function.
|
||||
RawHTTPResponse: row.RawHTTPResponse,
|
||||
}
|
||||
}
|
||||
|
||||
@ -716,6 +726,11 @@ type functionRow struct {
|
||||
WSIdleTimeoutSec int `db:"ws_idle_timeout_sec"`
|
||||
WSMaxFrameBytes int `db:"ws_max_frame_bytes"`
|
||||
WSMaxInflightPerConn int `db:"ws_max_inflight_per_conn"`
|
||||
|
||||
// Raw-HTTP-response mode (bugboard #835). Backed by migration
|
||||
// 029_raw_http_response.sql; defaults to false so existing functions
|
||||
// keep the JSON/Ack-wrapped behavior.
|
||||
RawHTTPResponse bool `db:"raw_http_response"`
|
||||
}
|
||||
|
||||
type envVarRow struct {
|
||||
|
||||
@ -57,8 +57,9 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by,
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
raw_http_response
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
_, err := s.db.Exec(ctx, query,
|
||||
id, fn.Name, fn.Namespace, version, wasmCID,
|
||||
@ -66,6 +67,7 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
|
||||
fn.RetryCount, retryDelay, fn.DLQTopic,
|
||||
string(FunctionStatusActive), now, now, fn.Namespace,
|
||||
fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn,
|
||||
fn.RawHTTPResponse,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to save function: %w", err)
|
||||
@ -101,6 +103,7 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
|
||||
WSIdleTimeoutSec: fn.WSIdleTimeoutSec,
|
||||
WSMaxFrameBytes: fn.WSMaxFrameBytes,
|
||||
WSMaxInflightPerConn: fn.WSMaxInflightPerConn,
|
||||
RawHTTPResponse: fn.RawHTTPResponse,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -114,7 +117,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
|
||||
|
||||
if version == 0 {
|
||||
query = `
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by
|
||||
@ -126,7 +129,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
|
||||
args = []interface{}{namespace, name, string(FunctionStatusActive)}
|
||||
} else {
|
||||
query = `
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by
|
||||
@ -154,7 +157,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
|
||||
// GetByID retrieves a function by its ID.
|
||||
func (s *FunctionStore) GetByID(ctx context.Context, id string) (*Function, error) {
|
||||
query := `
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by
|
||||
@ -180,7 +183,7 @@ func (s *FunctionStore) GetByNameInternal(ctx context.Context, namespace, name s
|
||||
name = strings.TrimSpace(name)
|
||||
|
||||
query := `
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by
|
||||
@ -207,6 +210,7 @@ func (s *FunctionStore) List(ctx context.Context, namespace string) ([]*Function
|
||||
query := `
|
||||
SELECT f.id, f.name, f.namespace, f.version, f.wasm_cid, f.source_cid,
|
||||
f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn,
|
||||
f.raw_http_response,
|
||||
f.memory_limit_mb, f.timeout_seconds, f.is_public,
|
||||
f.retry_count, f.retry_delay_seconds, f.dlq_topic,
|
||||
f.status, f.created_at, f.updated_at, f.created_by
|
||||
@ -238,7 +242,7 @@ func (s *FunctionStore) List(ctx context.Context, namespace string) ([]*Function
|
||||
// ListVersions returns all versions of a function.
|
||||
func (s *FunctionStore) ListVersions(ctx context.Context, namespace, name string) ([]*Function, error) {
|
||||
query := `
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
|
||||
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
|
||||
memory_limit_mb, timeout_seconds, is_public,
|
||||
retry_count, retry_delay_seconds, dlq_topic,
|
||||
status, created_at, updated_at, created_by
|
||||
@ -399,5 +403,6 @@ func rowToFunction(row *functionRow) *Function {
|
||||
WSIdleTimeoutSec: row.WSIdleTimeoutSec,
|
||||
WSMaxFrameBytes: row.WSMaxFrameBytes,
|
||||
WSMaxInflightPerConn: row.WSMaxInflightPerConn,
|
||||
RawHTTPResponse: row.RawHTTPResponse,
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,6 +38,9 @@ type FunctionDefinition struct {
|
||||
WSIdleTimeoutSec int
|
||||
WSMaxFrameBytes int
|
||||
WSMaxInflightPerConn int
|
||||
|
||||
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835).
|
||||
RawHTTPResponse bool
|
||||
}
|
||||
|
||||
// Function represents a deployed serverless function.
|
||||
@ -64,6 +67,9 @@ type Function struct {
|
||||
WSIdleTimeoutSec int
|
||||
WSMaxFrameBytes int
|
||||
WSMaxInflightPerConn int
|
||||
|
||||
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835).
|
||||
RawHTTPResponse bool
|
||||
}
|
||||
|
||||
// LogEntry represents a log message emitted from inside a WASM function
|
||||
@ -180,6 +186,7 @@ type functionRow struct {
|
||||
WSIdleTimeoutSec int
|
||||
WSMaxFrameBytes int
|
||||
WSMaxInflightPerConn int
|
||||
RawHTTPResponse bool
|
||||
}
|
||||
|
||||
type envVarRow struct {
|
||||
|
||||
34
core/pkg/serverless/registry_raw_http_test.go
Normal file
34
core/pkg/serverless/registry_raw_http_test.go
Normal file
@ -0,0 +1,34 @@
|
||||
package serverless
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestRegistryRowMapping_IncludesRawHTTPResponse guards the raw-HTTP-response
|
||||
// column (bugboard #835): rowToFunction must copy raw_http_response off the DB
|
||||
// row, otherwise the engine's `if fn.RawHTTPResponse` branch never attaches a
|
||||
// collector and set_http_response is a permanent no-op for every function.
|
||||
func TestRegistryRowMapping_IncludesRawHTTPResponse(t *testing.T) {
|
||||
row := functionRow{RawHTTPResponse: true}
|
||||
r := &Registry{}
|
||||
fn := r.rowToFunction(&row)
|
||||
if !fn.RawHTTPResponse {
|
||||
t.Error("rowToFunction did not propagate RawHTTPResponse — raw-HTTP functions would silently fall back to JSON/Ack output (bugboard #835)")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegistry_QueriesRawHTTPResponseColumn is the SQL-text drift guard: the
|
||||
// raw_http_response column must appear in the INSERT plus every READ-path
|
||||
// SELECT, mirroring the ws_* column guard. Counted ≥5 (one INSERT + the
|
||||
// Get/GetByID/List/ListVersions/getByNameInternal SELECTs).
|
||||
func TestRegistry_QueriesRawHTTPResponseColumn(t *testing.T) {
|
||||
source, err := readRegistrySource()
|
||||
if err != nil {
|
||||
t.Skipf("cannot read registry.go for SQL inspection: %v", err)
|
||||
}
|
||||
count := strings.Count(source, "raw_http_response")
|
||||
if count < 5 {
|
||||
t.Errorf("column raw_http_response appears in registry.go only %d times; expected ≥5 (INSERT + each SELECT path). A READ path probably dropped it and raw-HTTP functions will silently fall back to JSON output.", count)
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,159 @@
|
||||
package triggers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
olriclib "github.com/olric-data/olric"
|
||||
"github.com/olric-data/olric/stats"
|
||||
"go.uber.org/zap"
|
||||
"go.uber.org/zap/zapcore"
|
||||
"go.uber.org/zap/zaptest/observer"
|
||||
)
|
||||
|
||||
// failingOlricClient is a minimal olric.Client whose NewDMap always errors,
|
||||
// simulating an Olric backend that is configured but unavailable — the
|
||||
// degraded path bugboard #555 must surface (fail-open + rate-limited WARN).
|
||||
type failingOlricClient struct{}
|
||||
|
||||
func (failingOlricClient) NewDMap(string, ...olriclib.DMapOption) (olriclib.DMap, error) {
|
||||
return nil, fmt.Errorf("olric unavailable (test)")
|
||||
}
|
||||
func (failingOlricClient) NewPubSub(...olriclib.PubSubOption) (*olriclib.PubSub, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
func (failingOlricClient) Stats(context.Context, string, ...olriclib.StatsOption) (stats.Stats, error) {
|
||||
return stats.Stats{}, fmt.Errorf("not implemented")
|
||||
}
|
||||
func (failingOlricClient) Ping(context.Context, string, string) (string, error) {
|
||||
return "", fmt.Errorf("not implemented")
|
||||
}
|
||||
func (failingOlricClient) RoutingTable(context.Context) (olriclib.RoutingTable, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
func (failingOlricClient) Members(context.Context) ([]olriclib.Member, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
func (failingOlricClient) RefreshMetadata(context.Context) error { return nil }
|
||||
func (failingOlricClient) Close(context.Context) error { return nil }
|
||||
|
||||
var _ olriclib.Client = failingOlricClient{}
|
||||
|
||||
// Bugboard #555 — duplicate push from the dispatcher firing twice.
|
||||
//
|
||||
// These exercise Dispatch's local-dedup short-circuit and the
|
||||
// degraded-dedup WARN. They use a nil-db store: getMatches would panic on
|
||||
// the nil rqlite.Client, so "did we reach getMatches?" is observable as
|
||||
// "did Dispatch panic?". The local dedup runs BEFORE getMatches, so a
|
||||
// deduped call must return cleanly without touching the store.
|
||||
|
||||
func TestDispatch_localDedupSkipsSecondInvokeSameNode(t *testing.T) {
|
||||
logger := zap.NewNop()
|
||||
store := NewPubSubTriggerStore(nil, logger) // nil db: getMatches panics if reached
|
||||
d := NewPubSubDispatcher(store, nil, nil, nil, logger)
|
||||
|
||||
ns, topic, data := "anchat", "messages:new", []byte(`{"messageId":"m1"}`)
|
||||
|
||||
// First publish: NOT deduped → reaches getMatches → nil-db panic. We
|
||||
// recover and confirm we got past the dedup gate.
|
||||
reachedStore := false
|
||||
func() {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
reachedStore = true
|
||||
}
|
||||
}()
|
||||
d.Dispatch(context.Background(), ns, topic, data, 0)
|
||||
}()
|
||||
if !reachedStore {
|
||||
t.Fatal("first publish must pass the dedup gate and reach the store lookup")
|
||||
}
|
||||
|
||||
// Second IDENTICAL publish within the TTL: MUST be deduped locally and
|
||||
// return BEFORE getMatches — so no panic this time.
|
||||
dedupedClean := true
|
||||
func() {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
dedupedClean = false
|
||||
}
|
||||
}()
|
||||
d.Dispatch(context.Background(), ns, topic, data, 0)
|
||||
}()
|
||||
if !dedupedClean {
|
||||
t.Error("BUG #555 REGRESSION: identical second publish on the same node " +
|
||||
"must be deduped locally and NOT re-dispatch")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDispatch_distinctPayloadsBothDispatch(t *testing.T) {
|
||||
logger := zap.NewNop()
|
||||
store := NewPubSubTriggerStore(nil, logger)
|
||||
d := NewPubSubDispatcher(store, nil, nil, nil, logger)
|
||||
|
||||
ns, topic := "anchat", "messages:new"
|
||||
|
||||
for _, data := range [][]byte{[]byte(`{"messageId":"a"}`), []byte(`{"messageId":"b"}`)} {
|
||||
reachedStore := false
|
||||
func() {
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
reachedStore = true
|
||||
}
|
||||
}()
|
||||
d.Dispatch(context.Background(), ns, topic, data, 0)
|
||||
}()
|
||||
if !reachedStore {
|
||||
t.Errorf("distinct payload %q must NOT be deduped — it must reach dispatch", data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaimDispatch_degradedWarnWhenOlricDown(t *testing.T) {
|
||||
// Olric "configured but failing" path: a non-nil client whose NewDMap
|
||||
// errors. claimDispatch must STILL fire (fail-open) AND emit a WARN so
|
||||
// operators can see cross-node dedup is degraded.
|
||||
core, observed := observer.New(zapcore.WarnLevel)
|
||||
d := &PubSubDispatcher{
|
||||
logger: zap.New(core),
|
||||
olricClient: failingOlricClient{},
|
||||
}
|
||||
|
||||
if !d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x")) {
|
||||
t.Fatal("claimDispatch must fail-open (true) when Olric is degraded — never drop the wake")
|
||||
}
|
||||
if observed.FilterMessageSnippet("dedup degraded").Len() == 0 {
|
||||
t.Error("degraded Olric path must emit a WARN naming the degradation, not stay silent")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaimDispatch_degradedWarnRateLimited(t *testing.T) {
|
||||
// A sustained outage must NOT flood the log: only one WARN per interval.
|
||||
core, observed := observer.New(zapcore.WarnLevel)
|
||||
d := &PubSubDispatcher{
|
||||
logger: zap.New(core),
|
||||
olricClient: failingOlricClient{},
|
||||
}
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x"))
|
||||
}
|
||||
if got := observed.FilterMessageSnippet("dedup degraded").Len(); got != 1 {
|
||||
t.Errorf("degraded WARN must be rate-limited to 1 per interval; got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaimDispatch_nilOlricStaysQuiet(t *testing.T) {
|
||||
// nil Olric is a NORMAL single-node / cache-disabled config, not a
|
||||
// degraded multi-node cluster. It must fire but NOT warn (avoid noise).
|
||||
core, observed := observer.New(zapcore.WarnLevel)
|
||||
d := &PubSubDispatcher{logger: zap.New(core)} // olricClient nil
|
||||
|
||||
if !d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x")) {
|
||||
t.Fatal("nil Olric must fail-open (true)")
|
||||
}
|
||||
if observed.Len() != 0 {
|
||||
t.Errorf("nil Olric is a normal config and must NOT emit a degraded WARN; got %d logs", observed.Len())
|
||||
}
|
||||
}
|
||||
@ -134,8 +134,24 @@ type PubSubDispatcher struct {
|
||||
// stopCh signals the periodic Refresh goroutine to exit.
|
||||
stopCh chan struct{}
|
||||
stopOnce sync.Once
|
||||
|
||||
// localDedup guards against a SINGLE node invoking the same publish
|
||||
// twice (e.g. gossipsub self-delivery), independent of Olric health.
|
||||
// Bugboard #555. Always non-nil after NewPubSubDispatcher.
|
||||
localDedup *localDedupCache
|
||||
|
||||
// degradedDedupWarn rate-limits the "Olric dedup degraded" WARN so a
|
||||
// misconfigured cluster doesn't flood the log on every publish.
|
||||
// Bugboard #555.
|
||||
degradedDedupMu sync.Mutex
|
||||
degradedDedupLastWarn time.Time
|
||||
}
|
||||
|
||||
// degradedDedupWarnInterval rate-limits the cross-node dedup-degraded WARN
|
||||
// (bugboard #555). One warning per interval is enough to alert operators
|
||||
// without flooding the log under high publish volume.
|
||||
const degradedDedupWarnInterval = 60 * time.Second
|
||||
|
||||
// NewPubSubDispatcher creates a new PubSub trigger dispatcher.
|
||||
//
|
||||
// The `ps` argument may be nil (e.g. in tests, or namespaces with pubsub
|
||||
@ -158,6 +174,7 @@ func NewPubSubDispatcher(
|
||||
logger: logger,
|
||||
subscribedKeys: make(map[string]bool),
|
||||
stopCh: make(chan struct{}),
|
||||
localDedup: newLocalDedupCache(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -337,6 +354,20 @@ func (d *PubSubDispatcher) Dispatch(ctx context.Context, namespace, topic string
|
||||
return
|
||||
}
|
||||
|
||||
// Local once-per-publish dedup (bugboard #555). gossipsub can deliver
|
||||
// the SAME publish to this node's subscribe handler more than once
|
||||
// (self-delivery / fan-out), and the cross-node Olric claim below is a
|
||||
// no-op when Olric is down. This in-process guard ensures a SINGLE node
|
||||
// never invokes the same (namespace, topic, payload) twice, regardless
|
||||
// of Olric health.
|
||||
dedupKey := dispatchDedupKey(namespace, topic, data)
|
||||
if !d.localDedup.claim(dedupKey) {
|
||||
d.logger.Debug("PubSub dispatch deduped (local duplicate on this node)",
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("topic", topic))
|
||||
return
|
||||
}
|
||||
|
||||
// Cluster-wide once-per-publish dedup (bugboard #30). gossipsub
|
||||
// delivers a publish to every subscribed gateway node; only the node
|
||||
// that wins the Olric claim for this (namespace, topic, payload)
|
||||
@ -580,7 +611,7 @@ func (d *PubSubDispatcher) claimDispatch(ctx context.Context, namespace, topic s
|
||||
}
|
||||
dm, err := d.olricClient.NewDMap(dispatchDedupDMap)
|
||||
if err != nil {
|
||||
d.logger.Debug("dispatch dedup: NewDMap failed, firing (fail-open)", zap.Error(err))
|
||||
d.warnDedupDegraded("NewDMap failed", namespace, topic, err)
|
||||
return true
|
||||
}
|
||||
key := dispatchDedupKey(namespace, topic, data)
|
||||
@ -594,11 +625,39 @@ func (d *PubSubDispatcher) claimDispatch(ctx context.Context, namespace, topic s
|
||||
// Any other (transient) error: fail-open and fire rather than risk a
|
||||
// dropped wake. Worst case is a duplicate, which is what #30 already
|
||||
// had — never worse.
|
||||
d.logger.Debug("dispatch dedup: claim errored, firing (fail-open)",
|
||||
zap.String("topic", topic), zap.Error(err))
|
||||
d.warnDedupDegraded("claim Put errored", namespace, topic, err)
|
||||
return true
|
||||
}
|
||||
|
||||
// warnDedupDegraded emits a rate-limited WARN announcing that cross-node
|
||||
// dispatch dedup is degraded (Olric unavailable), so the cluster has fallen
|
||||
// back to firing on every node that receives the publish. The local cache
|
||||
// still prevents same-node duplicates, but cross-node duplicate pushes are
|
||||
// possible until Olric recovers — operators need visibility, not silence
|
||||
// (bugboard #555). Rate-limited so a sustained outage doesn't flood logs.
|
||||
func (d *PubSubDispatcher) warnDedupDegraded(reason, namespace, topic string, err error) {
|
||||
d.degradedDedupMu.Lock()
|
||||
now := time.Now()
|
||||
shouldWarn := now.Sub(d.degradedDedupLastWarn) >= degradedDedupWarnInterval
|
||||
if shouldWarn {
|
||||
d.degradedDedupLastWarn = now
|
||||
}
|
||||
d.degradedDedupMu.Unlock()
|
||||
|
||||
if !shouldWarn {
|
||||
return
|
||||
}
|
||||
d.logger.Warn("PubSub dispatch dedup degraded: Olric unavailable, "+
|
||||
"falling back to fire-on-every-node — cross-node duplicate pushes "+
|
||||
"possible until the shared store recovers",
|
||||
zap.String("reason", reason),
|
||||
zap.String("namespace", namespace),
|
||||
zap.String("topic", topic),
|
||||
zap.Duration("warn_interval", degradedDedupWarnInterval),
|
||||
zap.Error(err),
|
||||
)
|
||||
}
|
||||
|
||||
// InvalidateCache is now a no-op — the dispatcher no longer caches lookups.
|
||||
// Kept on the type so callers who used it still compile.
|
||||
func (d *PubSubDispatcher) InvalidateCache(ctx context.Context, namespace, topic string) {}
|
||||
|
||||
108
core/pkg/serverless/triggers/local_dedup.go
Normal file
108
core/pkg/serverless/triggers/local_dedup.go
Normal file
@ -0,0 +1,108 @@
|
||||
package triggers
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Bugboard #555 — messages:new trigger fires twice (duplicate push).
|
||||
//
|
||||
// Two distinct bugs produced duplicate dispatches:
|
||||
//
|
||||
// 1. Cross-node fail-open: claimDispatch (dispatcher.go) coordinates
|
||||
// once-per-publish dispatch via Olric, but FAILS OPEN when Olric is
|
||||
// unavailable/misconfigured. On a multi-node cluster every node that
|
||||
// receives the gossip publish then fires the handler → N duplicate
|
||||
// invocations (AnChat: exactly 2 on a 2-reachable-node cluster).
|
||||
//
|
||||
// 2. Single-node self-delivery: even on one node, gossipsub can deliver a
|
||||
// locally-originated publish back to the same node's subscribe handler,
|
||||
// and the only guard was the cross-node Olric claim — which is a no-op
|
||||
// when Olric is down.
|
||||
//
|
||||
// localDedupCache fixes (2) and bounds the blast radius of (1): a single
|
||||
// node never invokes the SAME publish twice, regardless of Olric health.
|
||||
// It is a small bounded map with per-entry TTL, keyed by the SAME string
|
||||
// dispatchDedupKey produces — (namespace, topic, sha256(payload)[:16]).
|
||||
//
|
||||
// IDENTICAL-PAYLOAD CAVEAT: the key folds the payload hash, NOT a stable
|
||||
// message id (gossipsub's message-ID isn't plumbed through the subscribe
|
||||
// handler, and parsing an app-specific id would couple the dispatcher to a
|
||||
// tenant's JSON schema). So two byte-identical publishes within the TTL
|
||||
// window collapse to one local invocation. Real payloads carry a unique id
|
||||
// (messageId/seq), so this is not a practical concern; it is the same
|
||||
// trade-off documented on dispatchDedupKey.
|
||||
const (
|
||||
// localDedupTTL bounds how long a (namespace, topic, payload) claim is
|
||||
// remembered on this node. It must cover gossipsub self-delivery /
|
||||
// fan-out jitter without de-duplicating legitimately-repeated publishes
|
||||
// seconds apart. Kept in lockstep with dispatchDedupTTL.
|
||||
localDedupTTL = 30 * time.Second
|
||||
|
||||
// localDedupMaxEntries caps the cache so a high-throughput namespace
|
||||
// can't grow it without bound. When the cap is hit, expired entries are
|
||||
// swept first; if still full, the claim is allowed through (fail-open —
|
||||
// a rare duplicate is far better than dropping a wake).
|
||||
localDedupMaxEntries = 4096
|
||||
)
|
||||
|
||||
// localDedupCache is a bounded, TTL'd set of recently-dispatched keys for a
|
||||
// single node. Safe for concurrent use.
|
||||
type localDedupCache struct {
|
||||
mu sync.Mutex
|
||||
entries map[string]time.Time // key -> expiry
|
||||
ttl time.Duration
|
||||
maxSize int
|
||||
now func() time.Time // injectable clock for tests
|
||||
}
|
||||
|
||||
// newLocalDedupCache builds a cache with the package default TTL and size.
|
||||
func newLocalDedupCache() *localDedupCache {
|
||||
return &localDedupCache{
|
||||
entries: make(map[string]time.Time),
|
||||
ttl: localDedupTTL,
|
||||
maxSize: localDedupMaxEntries,
|
||||
now: time.Now,
|
||||
}
|
||||
}
|
||||
|
||||
// claim records the key and reports whether THIS node may dispatch it now.
|
||||
//
|
||||
// Returns true the first time a key is seen within the TTL window (caller
|
||||
// should dispatch) and false on subsequent calls within the window (caller
|
||||
// should skip — it's a local duplicate).
|
||||
//
|
||||
// Fail-open: if the cache is at capacity and can't be swept enough to make
|
||||
// room, claim returns true (allow dispatch) rather than risk dropping a
|
||||
// legitimate wake.
|
||||
func (c *localDedupCache) claim(key string) bool {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
now := c.now()
|
||||
if exp, ok := c.entries[key]; ok && now.Before(exp) {
|
||||
return false // seen recently → local duplicate → skip
|
||||
}
|
||||
|
||||
// Either unseen or the previous entry expired. Sweep expired entries
|
||||
// before inserting so the map doesn't accumulate dead keys.
|
||||
if len(c.entries) >= c.maxSize {
|
||||
c.sweepExpiredLocked(now)
|
||||
}
|
||||
if len(c.entries) >= c.maxSize {
|
||||
// Still full of live entries — allow dispatch rather than drop.
|
||||
return true
|
||||
}
|
||||
|
||||
c.entries[key] = now.Add(c.ttl)
|
||||
return true
|
||||
}
|
||||
|
||||
// sweepExpiredLocked removes expired entries. Caller must hold c.mu.
|
||||
func (c *localDedupCache) sweepExpiredLocked(now time.Time) {
|
||||
for k, exp := range c.entries {
|
||||
if !now.Before(exp) {
|
||||
delete(c.entries, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
140
core/pkg/serverless/triggers/local_dedup_test.go
Normal file
140
core/pkg/serverless/triggers/local_dedup_test.go
Normal file
@ -0,0 +1,140 @@
|
||||
package triggers
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Bugboard #555 — a SINGLE node must never invoke the same publish twice,
|
||||
// independent of Olric health. These tests pin the local dedup cache's
|
||||
// claim/expiry/eviction behavior.
|
||||
|
||||
func TestLocalDedupCache_sameKeyClaimedOncePerWindow(t *testing.T) {
|
||||
c := newLocalDedupCache()
|
||||
key := dispatchDedupKey("ns", "messages:new", []byte(`{"id":1}`))
|
||||
|
||||
if !c.claim(key) {
|
||||
t.Fatal("first claim of an unseen key must fire (return true)")
|
||||
}
|
||||
if c.claim(key) {
|
||||
t.Error("second claim within the TTL must be deduped (return false)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLocalDedupCache_distinctKeysBothFire(t *testing.T) {
|
||||
c := newLocalDedupCache()
|
||||
a := dispatchDedupKey("ns", "messages:new", []byte("A"))
|
||||
b := dispatchDedupKey("ns", "messages:new", []byte("B"))
|
||||
|
||||
if !c.claim(a) {
|
||||
t.Error("distinct payload A must fire")
|
||||
}
|
||||
if !c.claim(b) {
|
||||
t.Error("distinct payload B must fire (different payload → different key)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLocalDedupCache_expiredEntryFiresAgain(t *testing.T) {
|
||||
// Drive a controllable clock so we don't sleep in the test.
|
||||
cur := time.Unix(1_000_000, 0)
|
||||
c := newLocalDedupCache()
|
||||
c.now = func() time.Time { return cur }
|
||||
|
||||
key := dispatchDedupKey("ns", "messages:new", []byte("x"))
|
||||
if !c.claim(key) {
|
||||
t.Fatal("first claim must fire")
|
||||
}
|
||||
if c.claim(key) {
|
||||
t.Fatal("immediate re-claim must be deduped")
|
||||
}
|
||||
|
||||
// Advance past the TTL: the entry has expired, so the same key must
|
||||
// fire again (a legitimately-repeated publish seconds apart).
|
||||
cur = cur.Add(localDedupTTL + time.Second)
|
||||
if !c.claim(key) {
|
||||
t.Error("after TTL expiry the same key must fire again")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLocalDedupCache_evictsExpiredOnPressure(t *testing.T) {
|
||||
cur := time.Unix(2_000_000, 0)
|
||||
c := &localDedupCache{
|
||||
entries: make(map[string]time.Time),
|
||||
ttl: localDedupTTL,
|
||||
maxSize: 4, // tiny cap to exercise the sweep path deterministically
|
||||
now: func() time.Time { return cur },
|
||||
}
|
||||
|
||||
// Fill to capacity with soon-to-expire entries.
|
||||
for i := 0; i < c.maxSize; i++ {
|
||||
key := dispatchDedupKey("ns", "t", []byte{byte(i)})
|
||||
if !c.claim(key) {
|
||||
t.Fatalf("fill claim %d must fire", i)
|
||||
}
|
||||
}
|
||||
if len(c.entries) != c.maxSize {
|
||||
t.Fatalf("expected cache full at %d, got %d", c.maxSize, len(c.entries))
|
||||
}
|
||||
|
||||
// Advance past TTL so every existing entry is expired, then claim a new
|
||||
// key: the sweep must reclaim space and the new key must be recorded.
|
||||
cur = cur.Add(localDedupTTL + time.Second)
|
||||
newKey := dispatchDedupKey("ns", "t", []byte("fresh"))
|
||||
if !c.claim(newKey) {
|
||||
t.Fatal("new key under pressure must fire")
|
||||
}
|
||||
if _, ok := c.entries[newKey]; !ok {
|
||||
t.Error("new key must be recorded after expired entries were swept")
|
||||
}
|
||||
if len(c.entries) > c.maxSize {
|
||||
t.Errorf("cache must not exceed maxSize after sweep; got %d", len(c.entries))
|
||||
}
|
||||
}
|
||||
|
||||
func TestLocalDedupCache_concurrentClaimsExactlyOneWins(t *testing.T) {
|
||||
// Race condition guard: when many goroutines race to claim the SAME key
|
||||
// (gossipsub delivering one publish across handler goroutines), exactly
|
||||
// one must win. Run under -race to catch unsynchronized map access.
|
||||
c := newLocalDedupCache()
|
||||
key := dispatchDedupKey("ns", "messages:new", []byte(`{"id":"race"}`))
|
||||
|
||||
const goroutines = 64
|
||||
var wins int64
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(goroutines)
|
||||
for i := 0; i < goroutines; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
if c.claim(key) {
|
||||
atomic.AddInt64(&wins, 1)
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if wins != 1 {
|
||||
t.Errorf("exactly one concurrent claim of the same key must win; got %d", wins)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLocalDedupCache_failsOpenWhenFullOfLiveEntries(t *testing.T) {
|
||||
cur := time.Unix(3_000_000, 0)
|
||||
c := &localDedupCache{
|
||||
entries: make(map[string]time.Time),
|
||||
ttl: localDedupTTL,
|
||||
maxSize: 2,
|
||||
now: func() time.Time { return cur },
|
||||
}
|
||||
|
||||
// Fill with two still-live entries.
|
||||
c.claim(dispatchDedupKey("ns", "t", []byte("a")))
|
||||
c.claim(dispatchDedupKey("ns", "t", []byte("b")))
|
||||
|
||||
// A new key when the cache is full of LIVE entries must fail-open
|
||||
// (fire) rather than drop a legitimate wake.
|
||||
if !c.claim(dispatchDedupKey("ns", "t", []byte("c"))) {
|
||||
t.Error("claim must fail-open (true) when the cache is full of live entries")
|
||||
}
|
||||
}
|
||||
@ -237,6 +237,11 @@ type FunctionDefinition struct {
|
||||
WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"` // 0 = no idle timeout
|
||||
WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"` // 0 = use default 256 KB
|
||||
WSMaxInflightPerConn int `json:"ws_max_inflight_per_conn,omitempty"` // 0 = use default 64
|
||||
|
||||
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835): the
|
||||
// function may call set_http_response to emit a verbatim status/headers/
|
||||
// body instead of the JSON/Ack-wrapped output. See pkg/serverless/raw_http.go.
|
||||
RawHTTPResponse bool `json:"raw_http_response,omitempty"`
|
||||
}
|
||||
|
||||
// DBTriggerConfig defines a database trigger configuration.
|
||||
@ -270,6 +275,11 @@ type Function struct {
|
||||
WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"`
|
||||
WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"`
|
||||
WSMaxInflightPerConn int `json:"ws_max_inflight_per_conn,omitempty"`
|
||||
|
||||
// RawHTTPResponse — bugboard #835. When true, the function may emit a
|
||||
// verbatim HTTP response via set_http_response instead of the
|
||||
// JSON/Ack-wrapped output. See pkg/serverless/raw_http.go.
|
||||
RawHTTPResponse bool `json:"raw_http_response,omitempty"`
|
||||
}
|
||||
|
||||
// InvocationContext provides context for a function invocation.
|
||||
@ -308,6 +318,14 @@ type InvocationContext struct {
|
||||
// could create by publishing topics that match its own wildcard
|
||||
// trigger (bugboard #93 follow-up).
|
||||
TriggerDepth int `json:"trigger_depth,omitempty"`
|
||||
|
||||
// RawHTTP carries a verbatim HTTP response set by a RawHTTPResponse
|
||||
// function (bugboard #835). The engine populates this from the
|
||||
// per-invocation collector after Execute returns; the Invoker surfaces
|
||||
// it on InvokeResponse so the HTTP handler can replay it. nil/unset for
|
||||
// normal functions and functions that didn't call set_http_response.
|
||||
// Not serialized — internal plumbing only.
|
||||
RawHTTP *RawHTTPResult `json:"-"`
|
||||
}
|
||||
|
||||
// InvocationResult represents the result of a function invocation.
|
||||
@ -555,6 +573,28 @@ type HostServices interface {
|
||||
// in OnClose unless they want to dynamically unsubscribe.
|
||||
WSPubSubUnbridge(ctx context.Context, clientID, topic string) error
|
||||
|
||||
// SetHTTPResponse records a verbatim HTTP response (status, headers, body)
|
||||
// for a RawHTTPResponse function (bugboard #835). The HTTP invoke handler
|
||||
// replays it byte-for-byte instead of the JSON/Ack-wrapped output, so a
|
||||
// function can transparently proxy an upstream RPC. Returns an error when
|
||||
// the function is NOT deployed with raw_http_response, or when the status /
|
||||
// header count / body size fail validation. headers may be nil.
|
||||
SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error
|
||||
|
||||
// EphemeralStateSet records WS-subscribe-tracked ephemeral state owned by
|
||||
// the current invocation's WS client (bugboard #710) and publishes a "set"
|
||||
// event on the topic so subscribers observe it. The state auto-clears (with
|
||||
// a synthetic "clear" event) when the owning WS client disconnects, and
|
||||
// also expires after ttlMs (clamped to a max; <=0 uses a default). Returns
|
||||
// an error when there is no WS client in context, on empty topic/key, on an
|
||||
// oversized payload, or when the client's per-connection key cap is hit.
|
||||
EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error
|
||||
|
||||
// EphemeralStateClear removes ephemeral state the current WS client owns
|
||||
// and publishes a "clear" event. Idempotent: clearing a missing or
|
||||
// non-owned key is a no-op. Errors only on no-WS-client / empty topic-key.
|
||||
EphemeralStateClear(ctx context.Context, topic, key string) error
|
||||
|
||||
// WebSocket operations (only valid in WS context)
|
||||
WSSend(ctx context.Context, clientID string, data []byte) error
|
||||
WSBroadcast(ctx context.Context, topic string, data []byte) error
|
||||
|
||||
@ -23,6 +23,14 @@ type WSManager struct {
|
||||
subscriptions map[string]map[string]struct{}
|
||||
subscriptionsMu sync.RWMutex
|
||||
|
||||
// disconnectHooks run (synchronously) on Unregister for each client,
|
||||
// AFTER the connection + subscriptions are torn down. Used by the
|
||||
// ephemeral-state store (bugboard #710) to auto-clear a client's owned
|
||||
// state on disconnect. Both the stateless and persistent WS handlers
|
||||
// call Unregister, so a single hook covers both paths.
|
||||
disconnectHooks []func(clientID string)
|
||||
disconnectHooksMu sync.RWMutex
|
||||
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
@ -102,6 +110,20 @@ func (m *WSManager) Register(clientID string, conn WebSocketConn) {
|
||||
)
|
||||
}
|
||||
|
||||
// AddDisconnectHook registers a callback fired (synchronously) for every
|
||||
// client passed to Unregister, after its connection + subscriptions are torn
|
||||
// down. Used to auto-clear WS-subscribe-tracked ephemeral state on disconnect
|
||||
// (bugboard #710). Hooks must be cheap and non-blocking — they run inline on
|
||||
// the WS read loop's teardown path. Register once at gateway init.
|
||||
func (m *WSManager) AddDisconnectHook(hook func(clientID string)) {
|
||||
if hook == nil {
|
||||
return
|
||||
}
|
||||
m.disconnectHooksMu.Lock()
|
||||
m.disconnectHooks = append(m.disconnectHooks, hook)
|
||||
m.disconnectHooksMu.Unlock()
|
||||
}
|
||||
|
||||
// Unregister removes a WebSocket connection and its subscriptions.
|
||||
func (m *WSManager) Unregister(clientID string) {
|
||||
m.connectionsMu.Lock()
|
||||
@ -130,6 +152,14 @@ func (m *WSManager) Unregister(clientID string) {
|
||||
// Close the connection
|
||||
_ = conn.conn.Close()
|
||||
|
||||
// Fire disconnect hooks (ephemeral-state auto-clear, bugboard #710).
|
||||
m.disconnectHooksMu.RLock()
|
||||
hooks := m.disconnectHooks
|
||||
m.disconnectHooksMu.RUnlock()
|
||||
for _, hook := range hooks {
|
||||
hook(clientID)
|
||||
}
|
||||
|
||||
m.logger.Debug("Unregistered WebSocket connection",
|
||||
zap.String("client_id", clientID),
|
||||
zap.Int("remaining_connections", m.GetConnectionCount()),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user