feat(serverless): add raw http response mode and secrets encryption

- Add `raw_http_response` configuration to functions to allow verbatim HTTP responses
- Implement cluster-wide secrets encryption key generation and distribution for serverless functions
- Update documentation with UnifiedPush support for ntfy on Android/GrapheneOS
This commit is contained in:
anonpenguin23 2026-06-09 13:01:02 +03:00
parent aa04ab5f50
commit f41242538e
54 changed files with 3310 additions and 53 deletions

View File

@ -214,6 +214,43 @@ your client computes locally from `(namespace, userId, topic_secret)`.
For `ntfy` with `topic_mode=path`, the token is `ns/<namespace>/<userId>`. For `ntfy` with `topic_mode=path`, the token is `ns/<namespace>/<userId>`.
### UnifiedPush (Android / GrapheneOS, no Google Play Services)
ntfy is a [UnifiedPush](https://unifiedpush.org) distributor, so Android
devices — including de-Googled **GrapheneOS** — can receive push **without
Firebase / Google Play Services**. The flow:
1. The device runs a UnifiedPush **distributor** (the ntfy Android app, or an
embedded distributor library) pointed at your push host
(`https://push.<your-zone>`).
2. The app registers with the distributor and is handed an **endpoint URL**,
e.g. `https://push.<your-zone>/upXXXXXXXX`.
3. Register that endpoint as a push device:
```http
POST /v1/push/devices
{
"device_id": "<unique per-device ID>",
"provider": "ntfy",
"token": "https://push.<your-zone>/upXXXXXXXX", // the full endpoint
"platform": "android"
}
```
The gateway POSTs to the endpoint **verbatim** (per the UnifiedPush spec), so
you don't have to deconstruct it. As a safety measure the endpoint's
scheme+host **must match your configured ntfy push host** — a device token can
only ever publish to your own push server, never an arbitrary host.
You may instead register just the bare **topic** (the endpoint's last path
segment) as the token — both forms work; use whichever your UnifiedPush library
makes convenient.
**GrapheneOS notes:** works under both "No Google Play" and "Sandboxed Google
Play" profiles. The distributor holds the persistent connection (not your app),
so battery impact is the distributor's; high-priority messages
(`priority: "high"`) wake the app from Doze.
--- ---
## Step 6 — Send pushes ## Step 6 — Send pushes

View File

@ -0,0 +1,15 @@
-- =============================================================================
-- 029_raw_http_response.sql
--
-- Raw-HTTP-response serverless function mode — bugboard #835.
--
-- When raw_http_response is true, the function may call the set_http_response
-- host function to emit a verbatim HTTP response (status + headers + body)
-- instead of the JSON/Ack-wrapped output. This lets a namespace app proxy an
-- upstream RPC (Helius / Alchemy) transparently. See pkg/serverless/raw_http.go.
--
-- Default false → backward compatible: existing functions keep returning the
-- JSON/Ack-wrapped output unchanged.
-- =============================================================================
ALTER TABLE functions ADD COLUMN raw_http_response BOOLEAN DEFAULT FALSE;

View File

@ -32,6 +32,11 @@ type FunctionConfig struct {
WSIdleTimeoutSec int `yaml:"ws_idle_timeout_sec"` WSIdleTimeoutSec int `yaml:"ws_idle_timeout_sec"`
WSMaxFrameBytes int `yaml:"ws_max_frame_bytes"` WSMaxFrameBytes int `yaml:"ws_max_frame_bytes"`
WSMaxInflightPerConn int `yaml:"ws_max_inflight_per_conn"` WSMaxInflightPerConn int `yaml:"ws_max_inflight_per_conn"`
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835) — the
// function may call set_http_response to emit a verbatim HTTP response
// (status/headers/body) instead of the JSON/Ack-wrapped output.
RawHTTPResponse bool `yaml:"raw_http_response"`
} }
// RetryConfig holds retry settings. // RetryConfig holds retry settings.
@ -226,6 +231,9 @@ func uploadWASMFunction(wasmPath string, cfg *FunctionConfig) (map[string]interf
if cfg.WSMaxInflightPerConn > 0 { if cfg.WSMaxInflightPerConn > 0 {
metaObj["ws_max_inflight_per_conn"] = cfg.WSMaxInflightPerConn metaObj["ws_max_inflight_per_conn"] = cfg.WSMaxInflightPerConn
} }
if cfg.RawHTTPResponse {
metaObj["raw_http_response"] = true
}
if len(metaObj) > 0 { if len(metaObj) > 0 {
metadata, _ := json.Marshal(metaObj) metadata, _ := json.Marshal(metaObj)
writer.WriteField("metadata", string(metadata)) writer.WriteField("metadata", string(metadata))

View File

@ -0,0 +1,53 @@
package functions
import (
"os"
"path/filepath"
"testing"
)
// writeFunctionYAML writes a function.yaml into a fresh temp dir and returns it.
func writeFunctionYAML(t *testing.T, body string) string {
t.Helper()
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "function.yaml"), []byte(body), 0o600); err != nil {
t.Fatalf("write function.yaml: %v", err)
}
return dir
}
func TestLoadConfig_RawHTTPResponse_true(t *testing.T) {
dir := writeFunctionYAML(t, "name: rpc-proxy\nraw_http_response: true\n")
cfg, err := LoadConfig(dir)
if err != nil {
t.Fatalf("LoadConfig: %v", err)
}
if !cfg.RawHTTPResponse {
t.Error("RawHTTPResponse = false, want true")
}
}
func TestLoadConfig_RawHTTPResponse_defaultsFalse(t *testing.T) {
dir := writeFunctionYAML(t, "name: plain-fn\n")
cfg, err := LoadConfig(dir)
if err != nil {
t.Fatalf("LoadConfig: %v", err)
}
if cfg.RawHTTPResponse {
t.Error("RawHTTPResponse = true, want false (omitted in yaml)")
}
}
func TestLoadConfig_RawHTTPResponse_explicitFalse(t *testing.T) {
dir := writeFunctionYAML(t, "name: plain-fn\nraw_http_response: false\n")
cfg, err := LoadConfig(dir)
if err != nil {
t.Fatalf("LoadConfig: %v", err)
}
if cfg.RawHTTPResponse {
t.Error("RawHTTPResponse = true, want false")
}
}

View File

@ -477,6 +477,14 @@ func (o *Orchestrator) saveSecretsFromJoinResponse(resp *joinhandlers.JoinRespon
} }
} }
// Write serverless secrets encryption key (bugboard #837) — identical on
// every node so namespace function secrets decrypt cluster-wide.
if resp.SecretsEncryptionKey != "" {
if err := os.WriteFile(filepath.Join(secretsDir, "secrets-encryption-key"), []byte(resp.SecretsEncryptionKey), 0600); err != nil {
return fmt.Errorf("failed to write secrets-encryption-key: %w", err)
}
}
// Write IPFS Cluster trusted peer IDs // Write IPFS Cluster trusted peer IDs
if len(resp.IPFSClusterPeerIDs) > 0 { if len(resp.IPFSClusterPeerIDs) > 0 {
content := strings.Join(resp.IPFSClusterPeerIDs, "\n") + "\n" content := strings.Join(resp.IPFSClusterPeerIDs, "\n") + "\n"

View File

@ -200,6 +200,18 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri
data.Environment = cg.Environment data.Environment = cg.Environment
data.OperatorWallet = cg.OperatorWallet data.OperatorWallet = cg.OperatorWallet
// Serverless function secrets encryption key (bugboard #837). Read the
// persisted key (generated in Phase3 / received via join) so it is
// rendered into node.yaml under http_gateway. If the file is missing the
// key is left empty and omitted from the rendered config — get_secret then
// stays disabled until the operator provisions the key. We deliberately do
// NOT generate here: generation/distribution is owned by SecretGenerator
// and the join flow so every node in a cluster shares one key.
secretsKeyPath := filepath.Join(cg.oramaDir, "secrets", "secrets-encryption-key")
if keyBytes, err := os.ReadFile(secretsKeyPath); err == nil {
data.SecretsEncryptionKey = strings.TrimSpace(string(keyBytes))
}
return templates.RenderNodeConfig(data) return templates.RenderNodeConfig(data)
} }
@ -471,6 +483,55 @@ func (sg *SecretGenerator) EnsureAPIKeyHMACSecret() (string, error) {
return secret, nil return secret, nil
} }
// EnsureSecretsEncryptionKey gets or generates the AES-256 key used to
// encrypt serverless function secrets at rest (the function_secrets table).
// The key is a 32-byte random value stored as 64 hex characters.
//
// It MUST be identical on every namespace-gateway node in a cluster and
// stable across restarts — otherwise secrets encrypted by one process can't
// be decrypted by another (bugboard #837). Like api-key-hmac-secret, joining
// nodes receive this value through the join flow rather than generating their
// own; this method only generates on the genesis node (or returns the
// existing key if a joining node already wrote it to disk).
func (sg *SecretGenerator) EnsureSecretsEncryptionKey() (string, error) {
secretPath := filepath.Join(sg.oramaDir, "secrets", "secrets-encryption-key")
secretDir := filepath.Dir(secretPath)
if err := os.MkdirAll(secretDir, 0700); err != nil {
return "", fmt.Errorf("failed to create secrets directory: %w", err)
}
if err := os.Chmod(secretDir, 0700); err != nil {
return "", fmt.Errorf("failed to set secrets directory permissions: %w", err)
}
// Try to read existing key
if data, err := os.ReadFile(secretPath); err == nil {
key := strings.TrimSpace(string(data))
if len(key) == 64 {
if err := ensureSecretFilePermissions(secretPath); err != nil {
return "", err
}
return key, nil
}
}
// Generate new key (32 bytes = 64 hex chars)
keyBytes := make([]byte, 32)
if _, err := rand.Read(keyBytes); err != nil {
return "", fmt.Errorf("failed to generate secrets encryption key: %w", err)
}
key := hex.EncodeToString(keyBytes)
if err := os.WriteFile(secretPath, []byte(key), 0600); err != nil {
return "", fmt.Errorf("failed to save secrets encryption key: %w", err)
}
if err := ensureSecretFilePermissions(secretPath); err != nil {
return "", err
}
return key, nil
}
func ensureSecretFilePermissions(secretPath string) error { func ensureSecretFilePermissions(secretPath string) error {
if err := os.Chmod(secretPath, 0600); err != nil { if err := os.Chmod(secretPath, 0600); err != nil {
return fmt.Errorf("failed to set permissions on %s: %w", secretPath, err) return fmt.Errorf("failed to set permissions on %s: %w", secretPath, err)

View File

@ -593,6 +593,12 @@ func (ps *ProductionSetup) Phase3GenerateSecrets() error {
} }
ps.logf(" ✓ API key HMAC secret ensured") ps.logf(" ✓ API key HMAC secret ensured")
// Serverless function secrets encryption key (bugboard #837)
if _, err := ps.secretGenerator.EnsureSecretsEncryptionKey(); err != nil {
return fmt.Errorf("failed to ensure secrets encryption key: %w", err)
}
ps.logf(" ✓ Secrets encryption key ensured")
// Node identity (unified architecture) // Node identity (unified architecture)
peerID, err := ps.secretGenerator.EnsureNodeIdentity() peerID, err := ps.secretGenerator.EnsureNodeIdentity()
if err != nil { if err != nil {

View File

@ -0,0 +1,80 @@
package production
import (
"encoding/hex"
"os"
"path/filepath"
"strings"
"testing"
)
// TestEnsureSecretsEncryptionKey_generatesAndPersists verifies that a fresh
// oramaDir produces a valid 32-byte hex key written to disk.
func TestEnsureSecretsEncryptionKey_generatesAndPersists(t *testing.T) {
dir := t.TempDir()
sg := NewSecretGenerator(dir)
key, err := sg.EnsureSecretsEncryptionKey()
if err != nil {
t.Fatalf("EnsureSecretsEncryptionKey failed: %v", err)
}
if len(key) != 64 {
t.Fatalf("expected 64 hex chars, got %d (%q)", len(key), key)
}
raw, err := hex.DecodeString(key)
if err != nil || len(raw) != 32 {
t.Fatalf("key is not 32 bytes hex: err=%v len=%d", err, len(raw))
}
// Persisted to the expected path.
data, err := os.ReadFile(filepath.Join(dir, "secrets", "secrets-encryption-key"))
if err != nil {
t.Fatalf("reading persisted key failed: %v", err)
}
if strings.TrimSpace(string(data)) != key {
t.Errorf("persisted key %q != returned key %q", strings.TrimSpace(string(data)), key)
}
}
// TestEnsureSecretsEncryptionKey_idempotent verifies the key is stable across
// calls — this is the property that makes secrets survive restarts and stay
// identical across cluster nodes (bugboard #837).
func TestEnsureSecretsEncryptionKey_idempotent(t *testing.T) {
dir := t.TempDir()
sg := NewSecretGenerator(dir)
first, err := sg.EnsureSecretsEncryptionKey()
if err != nil {
t.Fatalf("first call failed: %v", err)
}
second, err := sg.EnsureSecretsEncryptionKey()
if err != nil {
t.Fatalf("second call failed: %v", err)
}
if first != second {
t.Errorf("key changed between calls: %q != %q", first, second)
}
}
// TestEnsureSecretsEncryptionKey_regeneratesInvalid verifies a corrupt/empty
// on-disk key (wrong length) is replaced with a fresh valid one.
func TestEnsureSecretsEncryptionKey_regeneratesInvalid(t *testing.T) {
dir := t.TempDir()
secretsDir := filepath.Join(dir, "secrets")
if err := os.MkdirAll(secretsDir, 0700); err != nil {
t.Fatalf("mkdir failed: %v", err)
}
keyPath := filepath.Join(secretsDir, "secrets-encryption-key")
if err := os.WriteFile(keyPath, []byte("too-short"), 0600); err != nil {
t.Fatalf("write failed: %v", err)
}
sg := NewSecretGenerator(dir)
key, err := sg.EnsureSecretsEncryptionKey()
if err != nil {
t.Fatalf("EnsureSecretsEncryptionKey failed: %v", err)
}
if len(key) != 64 {
t.Errorf("expected regenerated 64-char key, got %d (%q)", len(key), key)
}
}

View File

@ -88,6 +88,12 @@ http_gateway:
ipfs_cluster_api_url: "http://localhost:{{.ClusterAPIPort}}" ipfs_cluster_api_url: "http://localhost:{{.ClusterAPIPort}}"
ipfs_api_url: "http://localhost:{{.IPFSAPIPort}}" ipfs_api_url: "http://localhost:{{.IPFSAPIPort}}"
ipfs_timeout: "60s" ipfs_timeout: "60s"
{{- if .SecretsEncryptionKey}}
# Serverless function secrets encryption key (AES-256, hex). Must be
# identical on every namespace-gateway node and stable across restarts
# (bugboard #837). Sourced from ~/.orama/secrets/secrets-encryption-key.
secrets_encryption_key: "{{.SecretsEncryptionKey}}"
{{- end}}
# Routes for internal service reverse proxy (kept for backwards compatibility but not used by full gateway) # Routes for internal service reverse proxy (kept for backwards compatibility but not used by full gateway)
routes: {} routes: {}

View File

@ -46,6 +46,15 @@ type NodeConfigData struct {
SSHUser string // SSH user for remote management SSHUser string // SSH user for remote management
Environment string // Environment name (devnet, testnet, etc.) Environment string // Environment name (devnet, testnet, etc.)
OperatorWallet string // Operator wallet address OperatorWallet string // Operator wallet address
// SecretsEncryptionKey is the AES-256 key (hex, 64 chars) used to encrypt
// serverless function secrets at rest. Rendered under http_gateway in
// node.yaml. Sourced from ~/.orama/secrets/secrets-encryption-key — must
// be identical across all namespace-gateway nodes in a cluster and stable
// across restarts (bugboard #837). Empty → key omitted from the rendered
// config (the gateway then reads the secret file directly / get_secret
// stays disabled until the key is configured).
SecretsEncryptionKey string
} }
// GatewayConfigData holds parameters for gateway.yaml rendering // GatewayConfigData holds parameters for gateway.yaml rendering

View File

@ -41,6 +41,32 @@ func TestRenderNodeConfig(t *testing.T) {
} }
} }
func TestRenderNodeConfig_secretsEncryptionKey(t *testing.T) {
const key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
// Happy path: key present → rendered under http_gateway.
withKey, err := RenderNodeConfig(NodeConfigData{
NodeID: "node1",
SecretsEncryptionKey: key,
})
if err != nil {
t.Fatalf("RenderNodeConfig failed: %v", err)
}
want := "secrets_encryption_key: \"" + key + "\""
if !strings.Contains(withKey, want) {
t.Errorf("rendered node config missing secrets key line %q\n---\n%s", want, withKey)
}
// Edge case: empty key → line omitted entirely (no empty value rendered).
withoutKey, err := RenderNodeConfig(NodeConfigData{NodeID: "node1"})
if err != nil {
t.Fatalf("RenderNodeConfig failed: %v", err)
}
if strings.Contains(withoutKey, "secrets_encryption_key") {
t.Errorf("empty key should omit secrets_encryption_key line, got:\n%s", withoutKey)
}
}
func TestRenderGatewayConfig(t *testing.T) { func TestRenderGatewayConfig(t *testing.T) {
bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890" bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890"
data := GatewayConfigData{ data := GatewayConfigData{

View File

@ -51,6 +51,14 @@ type Config struct {
// Loaded from ~/.orama/secrets/api-key-hmac-secret. // Loaded from ~/.orama/secrets/api-key-hmac-secret.
APIKeyHMACSecret string APIKeyHMACSecret string
// SecretsEncryptionKey is the AES-256 key (32 bytes, hex-encoded → 64
// hex chars) used to encrypt serverless function secrets at rest in the
// function_secrets table. It MUST be identical on every namespace-gateway
// node in a cluster and stable across restarts — otherwise secrets
// encrypted by one process cannot be decrypted by another (bugboard #837).
// Loaded from ~/.orama/secrets/secrets-encryption-key.
SecretsEncryptionKey string
// WebRTC configuration (set when namespace has WebRTC enabled). // WebRTC configuration (set when namespace has WebRTC enabled).
// //
// WebRTCEnabled is RETAINED for back-compat with operator YAML and // WebRTCEnabled is RETAINED for back-compat with operator YAML and

View File

@ -469,9 +469,17 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
engineCfg.MaxTimeoutSeconds = 60 engineCfg.MaxTimeoutSeconds = 60
engineCfg.ModuleCacheSize = 100 engineCfg.ModuleCacheSize = 100
// Create secrets manager for serverless functions (AES-256-GCM encrypted) // Create secrets manager for serverless functions (AES-256-GCM encrypted).
//
// The encryption key comes from the gateway Config (loaded from
// ~/.orama/secrets/secrets-encryption-key), NOT from engineCfg — engineCfg
// never has the key set, so passing it always produced a per-process
// ephemeral key and made get_secret return undecryptable values
// (bugboard #837). allowEphemeral=false: a missing/invalid key fails
// loudly here and disables get_secret rather than silently corrupting
// secrets.
var secretsMgr serverless.SecretsManager var secretsMgr serverless.SecretsManager
if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, engineCfg.SecretsEncryptionKey, logger.Logger); secretsErr != nil { if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, cfg.SecretsEncryptionKey, false, logger.Logger); secretsErr != nil {
logger.ComponentWarn(logging.ComponentGeneral, "Failed to initialize secrets manager; get_secret will be unavailable", logger.ComponentWarn(logging.ComponentGeneral, "Failed to initialize secrets manager; get_secret will be unavailable",
zap.Error(secretsErr)) zap.Error(secretsErr))
} else { } else {

View File

@ -39,6 +39,9 @@ type JoinResponse struct {
APIKeyHMACSecret string `json:"api_key_hmac_secret,omitempty"` APIKeyHMACSecret string `json:"api_key_hmac_secret,omitempty"`
RQLitePassword string `json:"rqlite_password,omitempty"` RQLitePassword string `json:"rqlite_password,omitempty"`
OlricEncryptionKey string `json:"olric_encryption_key,omitempty"` OlricEncryptionKey string `json:"olric_encryption_key,omitempty"`
// Serverless secrets encryption key (bugboard #837) — must be identical on
// every node so namespace function secrets decrypt cluster-wide.
SecretsEncryptionKey string `json:"secrets_encryption_key,omitempty"`
// Cluster join info (all using WG IPs) // Cluster join info (all using WG IPs)
RQLiteJoinAddress string `json:"rqlite_join_address"` RQLiteJoinAddress string `json:"rqlite_join_address"`
@ -200,6 +203,13 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) {
olricEncryptionKey = strings.TrimSpace(string(data)) olricEncryptionKey = strings.TrimSpace(string(data))
} }
// Read serverless secrets encryption key (optional — may not exist on
// older clusters; bugboard #837)
secretsEncryptionKey := ""
if data, err := os.ReadFile(h.oramaDir + "/secrets/secrets-encryption-key"); err == nil {
secretsEncryptionKey = strings.TrimSpace(string(data))
}
// 7. Get this node's WG IP (needed before peer list to check self-inclusion) // 7. Get this node's WG IP (needed before peer list to check self-inclusion)
myWGIP, err := h.getMyWGIP() myWGIP, err := h.getMyWGIP()
if err != nil { if err != nil {
@ -271,6 +281,7 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) {
APIKeyHMACSecret: apiKeyHMACSecret, APIKeyHMACSecret: apiKeyHMACSecret,
RQLitePassword: rqlitePassword, RQLitePassword: rqlitePassword,
OlricEncryptionKey: olricEncryptionKey, OlricEncryptionKey: olricEncryptionKey,
SecretsEncryptionKey: secretsEncryptionKey,
RQLiteJoinAddress: fmt.Sprintf("%s:7001", myWGIP), RQLiteJoinAddress: fmt.Sprintf("%s:7001", myWGIP),
IPFSPeer: ipfsPeer, IPFSPeer: ipfsPeer,
IPFSClusterPeer: ipfsClusterPeer, IPFSClusterPeer: ipfsClusterPeer,

View File

@ -17,7 +17,6 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"net/http" "net/http"
"strings"
"time" "time"
"github.com/DeBrosOfficial/network/pkg/push" "github.com/DeBrosOfficial/network/pkg/push"
@ -136,13 +135,13 @@ func (h *Handlers) PutConfigHandler(w http.ResponseWriter, r *http.Request) {
return return
} }
// Validate URL fields look reasonable. We don't do hostname resolution // Reject a base URL that targets an internal/reserved host — a tenant must
// here (slow, flaky); just reject obviously-wrong schemes. // not be able to turn the gateway's push sender into an SSRF proxy (cloud
// metadata, WireGuard mesh, loopback). This is the config-SET path, so the
// DNS-resolving check is fine here; the hot send path never runs it.
if body.NtfyBaseURL != nil && *body.NtfyBaseURL != "" { if body.NtfyBaseURL != nil && *body.NtfyBaseURL != "" {
if !strings.HasPrefix(*body.NtfyBaseURL, "http://") && if err := push.CheckBaseURLResolvable(r.Context(), *body.NtfyBaseURL); err != nil {
!strings.HasPrefix(*body.NtfyBaseURL, "https://") { writeError(w, http.StatusBadRequest, "ntfy_base_url rejected: "+err.Error())
writeError(w, http.StatusBadRequest,
"ntfy_base_url must start with http:// or https://")
return return
} }
} }

View File

@ -0,0 +1,63 @@
package push
import (
"context"
"net/http"
"net/http/httptest"
"testing"
authsvc "github.com/DeBrosOfficial/network/pkg/gateway/auth"
"github.com/DeBrosOfficial/network/pkg/gateway/ctxkeys"
)
// Bugboard #548 — a push device must be keyed on the stable identity (rootId)
// when the app provides one, not the wallet credential that authenticated the
// session. resolveCallerUserID prefers the `root_id` custom claim and falls
// back to the JWT subject so single-credential apps keep working.
func reqWithClaims(t *testing.T, claims *authsvc.JWTClaims) *http.Request {
t.Helper()
r := httptest.NewRequest(http.MethodGet, "/", nil)
ctx := r.Context()
if claims != nil {
ctx = context.WithValue(ctx, ctxkeys.JWT, claims)
}
return r.WithContext(ctx)
}
func TestResolveCallerUserID_prefersRootIDClaim(t *testing.T) {
r := reqWithClaims(t, &authsvc.JWTClaims{
Sub: "0xWALLET",
Custom: map[string]string{rootIDClaim: "root-uuid-123"},
})
if got := resolveCallerUserID(r); got != "root-uuid-123" {
t.Errorf("want rootId from claim, got %q", got)
}
}
func TestResolveCallerUserID_fallsBackToSubject(t *testing.T) {
// No custom claim → wallet subject (back-compat for single-credential apps).
r := reqWithClaims(t, &authsvc.JWTClaims{Sub: "0xWALLET"})
if got := resolveCallerUserID(r); got != "0xWALLET" {
t.Errorf("want wallet subject fallback, got %q", got)
}
}
func TestResolveCallerUserID_emptyRootIDFallsBack(t *testing.T) {
// An empty root_id must not collapse identity to "" — fall back to subject.
r := reqWithClaims(t, &authsvc.JWTClaims{
Sub: "0xWALLET",
Custom: map[string]string{rootIDClaim: ""},
})
if got := resolveCallerUserID(r); got != "0xWALLET" {
t.Errorf("want wallet fallback on empty root_id, got %q", got)
}
}
func TestResolveCallerUserID_noJWTReturnsEmpty(t *testing.T) {
// API-key-only request (no JWT in context) → empty.
r := reqWithClaims(t, nil)
if got := resolveCallerUserID(r); got != "" {
t.Errorf("want empty for API-key-only request, got %q", got)
}
}

View File

@ -141,11 +141,27 @@ func resolveNamespace(r *http.Request) string {
return "" return ""
} }
// resolveCallerUserID extracts the JWT subject (typically the wallet) of // rootIDClaim is the custom JWT claim an app may set to carry the stable
// the caller, or empty if the request was authenticated by API key only. // identity (rootId) that a device should be keyed on, independent of which
// wallet credential authenticated the session. See bugboard #548.
const rootIDClaim = "root_id"
// resolveCallerUserID extracts the identity a push device should be keyed on.
//
// In a multi-credential app (anchat), the JWT subject is the *wallet* — a
// credential, not the identity. A single user (rootId) with N linked wallets
// would otherwise register N device rows and receive N duplicate pushes
// (bugboard #548). When the app includes a stable `root_id` custom claim, we
// key on that; otherwise we fall back to the subject (wallet) so single-
// credential apps and older tokens keep working unchanged.
//
// Returns empty if the request was authenticated by API key only (no JWT).
func resolveCallerUserID(r *http.Request) string { func resolveCallerUserID(r *http.Request) string {
if v := r.Context().Value(ctxkeys.JWT); v != nil { if v := r.Context().Value(ctxkeys.JWT); v != nil {
if claims, ok := v.(*auth.JWTClaims); ok && claims != nil { if claims, ok := v.(*auth.JWTClaims); ok && claims != nil {
if rootID, ok := claims.Custom[rootIDClaim]; ok && rootID != "" {
return rootID
}
return claims.Sub return claims.Sub
} }
} }

View File

@ -145,6 +145,27 @@ func (h *ServerlessHandlers) InvokeFunction(w http.ResponseWriter, r *http.Reque
w.Header().Set("X-Request-ID", resp.RequestID) w.Header().Set("X-Request-ID", resp.RequestID)
w.Header().Set("X-Duration-Ms", strconv.FormatInt(resp.DurationMS, 10)) w.Header().Set("X-Duration-Ms", strconv.FormatInt(resp.DurationMS, 10))
// Raw-HTTP-response mode (bugboard #835): when a function deployed with
// raw_http_response actually set a response via set_http_response, replay
// it verbatim (status + headers + body) and skip the sniff/wrap path. If
// the function set nothing, RawHTTP is nil and we fall through to the
// normal behavior unchanged.
if resp.RawHTTP != nil {
for k, v := range resp.RawHTTP.Headers {
// A tenant function must not overwrite gateway-owned trace/auth
// headers or framing-control (hop-by-hop) headers via its raw
// response — that would let it forge request IDs, leak/spoof
// internal-auth headers, or corrupt response framing.
if isReservedResponseHeader(k) {
continue
}
w.Header().Set(k, v)
}
w.WriteHeader(resp.RawHTTP.Status)
w.Write(resp.RawHTTP.Body)
return
}
// Try to detect if output is JSON // Try to detect if output is JSON
if len(resp.Output) > 0 && (resp.Output[0] == '{' || resp.Output[0] == '[') { if len(resp.Output) > 0 && (resp.Output[0] == '{' || resp.Output[0] == '[') {
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
@ -256,3 +277,32 @@ func (h *ServerlessHandlers) ListVersions(w http.ResponseWriter, r *http.Request
"count": len(versions), "count": len(versions),
}) })
} }
// reservedResponseHeaders are response headers a raw-HTTP-response tenant
// function (bugboard #835) must not be able to set or overwrite: gateway-owned
// trace/auth headers and hop-by-hop / framing-control headers. Compared
// case-insensitively; the X-Internal- prefix is matched separately.
var reservedResponseHeaders = map[string]struct{}{
"x-request-id": {},
"x-duration-ms": {},
"content-length": {},
"transfer-encoding": {},
"connection": {},
"keep-alive": {},
"proxy-authenticate": {},
"proxy-authorization": {},
"te": {},
"trailer": {},
"upgrade": {},
}
// isReservedResponseHeader reports whether a tenant-supplied response header key
// is reserved for the gateway and must be ignored in raw-HTTP-response mode.
func isReservedResponseHeader(key string) bool {
k := strings.ToLower(strings.TrimSpace(key))
if _, ok := reservedResponseHeaders[k]; ok {
return true
}
// Any internal-auth header the gateway uses for inter-service trust.
return strings.HasPrefix(k, "x-internal-")
}

View File

@ -0,0 +1,31 @@
package serverless
import "testing"
// Bugboard #835 hardening (flagged by code + security review): a raw-HTTP
// tenant function must not be able to set/overwrite gateway-owned trace/auth
// headers or hop-by-hop framing headers.
func TestIsReservedResponseHeader(t *testing.T) {
reserved := []string{
"X-Request-ID", "x-request-id", "X-Duration-Ms",
"Content-Length", "Transfer-Encoding", "Connection", "Keep-Alive",
"Proxy-Authenticate", "Proxy-Authorization", "TE", "Trailer", "Upgrade",
"X-Internal-Auth", "x-internal-anything", " X-Request-Id ",
}
for _, h := range reserved {
if !isReservedResponseHeader(h) {
t.Errorf("isReservedResponseHeader(%q) = false; want true (must be protected)", h)
}
}
allowed := []string{
"Content-Type", "Cache-Control", "X-Custom", "ETag",
"Access-Control-Allow-Origin", "Location", "Retry-After",
}
for _, h := range allowed {
if isReservedResponseHeader(h) {
t.Errorf("isReservedResponseHeader(%q) = true; want false (tenant may set it)", h)
}
}
}

View File

@ -58,6 +58,15 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
rqlitePassword = strings.TrimSpace(string(secretBytes)) rqlitePassword = strings.TrimSpace(string(secretBytes))
} }
// Read the serverless secrets encryption key (bugboard #837). Must be the
// SAME value on every namespace-gateway node so a secret encrypted by one
// process decrypts on another; an empty value makes get_secret fail loudly
// (the manager refuses an ephemeral key in production).
secretsEncryptionKey := ""
if secretBytes, err := os.ReadFile(filepath.Join(oramaDir, "secrets", "secrets-encryption-key")); err == nil {
secretsEncryptionKey = strings.TrimSpace(string(secretBytes))
}
gwCfg := &gateway.Config{ gwCfg := &gateway.Config{
ListenAddr: n.config.HTTPGateway.ListenAddr, ListenAddr: n.config.HTTPGateway.ListenAddr,
ClientNamespace: n.config.HTTPGateway.ClientNamespace, ClientNamespace: n.config.HTTPGateway.ClientNamespace,
@ -75,6 +84,7 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
RQLitePassword: rqlitePassword, RQLitePassword: rqlitePassword,
ClusterSecret: clusterSecret, ClusterSecret: clusterSecret,
APIKeyHMACSecret: apiKeyHMACSecret, APIKeyHMACSecret: apiKeyHMACSecret,
SecretsEncryptionKey: secretsEncryptionKey,
WebRTCEnabled: n.config.HTTPGateway.WebRTC.Enabled, WebRTCEnabled: n.config.HTTPGateway.WebRTC.Enabled,
SFUPort: n.config.HTTPGateway.WebRTC.SFUPort, SFUPort: n.config.HTTPGateway.WebRTC.SFUPort,
TURNDomain: n.config.HTTPGateway.WebRTC.TURNDomain, TURNDomain: n.config.HTTPGateway.WebRTC.TURNDomain,

View File

@ -296,7 +296,17 @@ func (m *Manager) buildDispatcher(ctx context.Context, namespace string) (*PushD
// (DELETE) — there's no "set this field to empty to clear" // (DELETE) — there's no "set this field to empty to clear"
// half-state, by design. // half-state, by design.
if nc.NtfyBaseURL != "" { if nc.NtfyBaseURL != "" {
eff.NtfyBaseURL = nc.NtfyBaseURL // Defense-in-depth: a base URL stored before the SSRF guard
// existed (or via any path that skipped it) must not point at an
// internal/reserved literal IP. Drop the override and fall back
// to the gateway default if it does. Literal-only (no DNS, no
// syntax re-validation) so this stays safe on the hot build path.
if IsInternalBaseURL(nc.NtfyBaseURL) {
m.logger.Warn("push: ignoring namespace ntfy_base_url override (internal address)",
zap.String("namespace", namespace), zap.String("base_url", nc.NtfyBaseURL))
} else {
eff.NtfyBaseURL = nc.NtfyBaseURL
}
} }
if nc.NtfyAuthToken != "" { if nc.NtfyAuthToken != "" {
eff.NtfyAuthToken = nc.NtfyAuthToken eff.NtfyAuthToken = nc.NtfyAuthToken

View File

@ -16,10 +16,11 @@ package ntfy
// migration window, with the new credentials store taking precedence. // migration window, with the new credentials store taking precedence.
import ( import (
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"strings"
"github.com/DeBrosOfficial/network/pkg/push"
"github.com/DeBrosOfficial/network/pkg/push/credentials" "github.com/DeBrosOfficial/network/pkg/push/credentials"
) )
@ -87,7 +88,17 @@ func (Validator) Validate(raw []byte) error {
if err := json.Unmarshal(raw, &c); err != nil { if err := json.Unmarshal(raw, &c); err != nil {
return fmt.Errorf("ntfy credentials: invalid JSON: %w", err) return fmt.Errorf("ntfy credentials: invalid JSON: %w", err)
} }
return validateCredentials(c) if err := validateCredentials(c); err != nil {
return err
}
// Validate is the config-SET path (the hot build path uses ParseCredentials,
// which skips DNS), so the resolving SSRF check is safe here: reject a
// base_url whose host resolves to an internal/reserved address. Fail-open on
// resolution error — see push.CheckBaseURLResolvable.
if err := push.CheckBaseURLResolvable(context.Background(), c.BaseURL); err != nil {
return fmt.Errorf("ntfy credentials: %w", err)
}
return nil
} }
// Redact returns a JSON-safe view that never echoes the auth token or // Redact returns a JSON-safe view that never echoes the auth token or
@ -127,10 +138,12 @@ func ParseCredentials(raw []byte) (Credentials, error) {
// validateCredentials is the shared validator used by both Validate and // validateCredentials is the shared validator used by both Validate and
// ParseCredentials. // ParseCredentials.
func validateCredentials(c Credentials) error { func validateCredentials(c Credentials) error {
if c.BaseURL != "" { // Literal-IP SSRF guard + scheme check. Runs on BOTH the set and the hot
if !strings.HasPrefix(c.BaseURL, "http://") && !strings.HasPrefix(c.BaseURL, "https://") { // build path (no DNS), so a stored internal-literal base_url is also
return fmt.Errorf("ntfy credentials: base_url must start with http:// or https:// (got %q)", c.BaseURL) // rejected when the dispatcher is (re)built. The DNS-resolving check lives
} // in Validate (set path only).
if err := push.CheckBaseURLSyntax(c.BaseURL); err != nil {
return fmt.Errorf("ntfy credentials: %w", err)
} }
if c.TopicMode != "" { if c.TopicMode != "" {
switch c.TopicMode { switch c.TopicMode {

View File

@ -26,7 +26,10 @@ func TestValidator_RejectsBadBaseURL(t *testing.T) {
} }
func TestValidator_AcceptsHttpAndHttps(t *testing.T) { func TestValidator_AcceptsHttpAndHttps(t *testing.T) {
for _, base := range []string{"http://push.local:8080", "https://push.example.com"} { // Literal public (documentation-range) IPs so the test is deterministic and
// never hits real DNS — Validate now does a set-time SSRF resolve for
// hostname base URLs.
for _, base := range []string{"http://203.0.113.10:8080", "https://203.0.113.10"} {
body, _ := json.Marshal(Credentials{BaseURL: base}) body, _ := json.Marshal(Credentials{BaseURL: base})
if err := NewValidator().Validate(body); err != nil { if err := NewValidator().Validate(body); err != nil {
t.Errorf("base_url=%q rejected: %v", base, err) t.Errorf("base_url=%q rejected: %v", base, err)
@ -34,6 +37,21 @@ func TestValidator_AcceptsHttpAndHttps(t *testing.T) {
} }
} }
func TestValidator_RejectsInternalBaseURL(t *testing.T) {
// SSRF guard: a tenant must not point the push base URL at an internal /
// reserved address. Literal IPs are rejected without DNS.
for _, base := range []string{
"http://169.254.169.254", // cloud metadata
"http://127.0.0.1:8090", // loopback (the operator's local ntfy)
"http://10.0.0.5", // WireGuard mesh
} {
body, _ := json.Marshal(Credentials{BaseURL: base})
if err := NewValidator().Validate(body); err == nil {
t.Errorf("internal base_url %q must be rejected (SSRF)", base)
}
}
}
func TestValidator_RejectsBadTopicMode(t *testing.T) { func TestValidator_RejectsBadTopicMode(t *testing.T) {
if err := NewValidator().Validate([]byte(`{"topic_mode":"random"}`)); err == nil { if err := NewValidator().Validate([]byte(`{"topic_mode":"random"}`)); err == nil {
t.Error("expected rejection of unknown topic_mode") t.Error("expected rejection of unknown topic_mode")

View File

@ -74,15 +74,10 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
return fmt.Errorf("ntfy: base URL not configured") return fmt.Errorf("ntfy: base URL not configured")
} }
// URL-escape each path segment of the device token. ntfy topics can be endpointURL, err := p.resolveEndpoint(msg.DeviceToken)
// hierarchical (e.g. "ns/myapp/user-1") and we want to preserve those if err != nil {
// '/' separators while escaping any other special characters that return err
// could let a malicious token escape the topic path.
parts := strings.Split(msg.DeviceToken, "/")
for i, p := range parts {
parts[i] = url.PathEscape(p)
} }
endpointURL := p.baseURL + "/" + strings.Join(parts, "/")
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(msg.Body)) req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(msg.Body))
if err != nil { if err != nil {
@ -130,3 +125,58 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 4096)) _, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
return nil return nil
} }
// resolveEndpoint maps a device token to the ntfy publish URL.
//
// The token is one of two shapes:
//
// - A plain ntfy topic (possibly hierarchical, e.g. "ns/myapp/user-1") —
// published to "<baseURL>/<topic>", with each path segment escaped so a
// crafted token can't break out of the topic path.
// - A full UnifiedPush endpoint URL handed to the client by the ntfy
// distributor (e.g. "https://push.example.com/up<random>"). UnifiedPush
// requires the application server to POST to that endpoint verbatim, so we
// use it as-is — but ONLY after verifying its scheme+host match the
// configured base URL. That check turns a device-supplied token into an
// SSRF only against our own push host, never an arbitrary one.
func (p *Provider) resolveEndpoint(token string) (string, error) {
topic := token
if isAbsoluteHTTPURL(token) {
u, err := url.Parse(token)
if err != nil {
return "", fmt.Errorf("ntfy: invalid endpoint url: %w", err)
}
base, err := url.Parse(p.baseURL)
if err != nil {
return "", fmt.Errorf("ntfy: invalid base url %q: %w", p.baseURL, err)
}
if !strings.EqualFold(u.Scheme, base.Scheme) || !strings.EqualFold(u.Host, base.Host) {
// Reject an endpoint pointing anywhere other than the configured
// push host — a device token must never become an SSRF vector.
return "", fmt.Errorf("ntfy: endpoint host %q does not match configured push host %q", u.Host, base.Host)
}
// Confine the URL form to the SAME publish surface as a bare topic:
// take only the path as the topic and re-build through the per-segment
// escaping below, dropping any query/fragment. So a UnifiedPush
// endpoint token can publish a topic but can't gain arbitrary path or
// query control on the push host beyond what a plain topic already has.
topic = strings.TrimPrefix(u.Path, "/")
if topic == "" {
return "", fmt.Errorf("ntfy: endpoint url %q has no topic path", token)
}
}
// Escape each path segment, preserving the '/' hierarchy.
parts := strings.Split(topic, "/")
for i, seg := range parts {
parts[i] = url.PathEscape(seg)
}
return p.baseURL + "/" + strings.Join(parts, "/"), nil
}
// isAbsoluteHTTPURL reports whether s looks like an absolute http(s) URL (the
// UnifiedPush endpoint form) rather than a bare ntfy topic.
func isAbsoluteHTTPURL(s string) bool {
lower := strings.ToLower(s)
return strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://")
}

View File

@ -7,6 +7,7 @@ import (
"io" "io"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"net/url"
"strings" "strings"
"testing" "testing"
"time" "time"
@ -183,6 +184,108 @@ func TestSend_no_baseURL_returns_error(t *testing.T) {
} }
} }
// feat-32: an Android/GrapheneOS UnifiedPush device registers the full endpoint
// URL its distributor hands it. UnifiedPush requires the app server to POST to
// that endpoint verbatim, and we must do so ONLY when the host matches our
// configured push server (never an arbitrary host → no SSRF).
func TestSend_unifiedPush_endpoint_published(t *testing.T) {
var gotPath, gotBody string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
b, _ := io.ReadAll(r.Body)
gotBody = string(b)
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
p := New(Config{BaseURL: srv.URL}, nil)
// The distributor hands the client a full endpoint on the SAME (push) host.
endpoint := srv.URL + "/upAbc123"
if err := p.Send(context.Background(), push.PushMessage{DeviceToken: endpoint, Body: "payload"}); err != nil {
t.Fatalf("Send: %v", err)
}
if gotPath != "/upAbc123" {
t.Errorf("UnifiedPush endpoint must publish to its topic path; got %q", gotPath)
}
if gotBody != "payload" {
t.Errorf("body not delivered; got %q", gotBody)
}
}
func TestSend_unifiedPush_endpoint_confined_to_topic(t *testing.T) {
// A URL token must be confined to the same publish surface as a bare topic:
// the path becomes the topic, and any query string is dropped — so it can't
// gain arbitrary path/query control on the push host.
var gotPath, gotQuery string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
gotQuery = r.URL.RawQuery
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
p := New(Config{BaseURL: srv.URL}, nil)
endpoint := srv.URL + "/uptopic?admin=1&x=y"
if err := p.Send(context.Background(), push.PushMessage{DeviceToken: endpoint, Body: "x"}); err != nil {
t.Fatalf("Send: %v", err)
}
if gotPath != "/uptopic" {
t.Errorf("path must be the topic only; got %q", gotPath)
}
if gotQuery != "" {
t.Errorf("query string must be dropped (no arbitrary query on push host); got %q", gotQuery)
}
}
func TestSend_unifiedPush_endpoint_rejects_userinfo_bypass(t *testing.T) {
// Classic SSRF guard bypass: smuggle the real host into userinfo. url.Parse
// resolves the authority to the attacker host, so it must be rejected.
hit := false
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
hit = true
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
// base host = srv host; token tries "<srvhost>@attacker.example.com".
base, _ := url.Parse(srv.URL)
p := New(Config{BaseURL: srv.URL}, nil)
token := base.Scheme + "://" + base.Host + "@attacker.example.com/x"
if err := p.Send(context.Background(), push.PushMessage{DeviceToken: token, Body: "x"}); err == nil {
t.Fatal("expected rejection of a userinfo-smuggled host")
}
if hit {
t.Error("no request must be sent for a userinfo-bypass token")
}
}
func TestSend_unifiedPush_endpoint_rejects_foreign_host(t *testing.T) {
hit := false
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
hit = true
w.WriteHeader(http.StatusOK)
}))
defer srv.Close()
p := New(Config{BaseURL: srv.URL}, nil)
// A device token pointing at a DIFFERENT host must be rejected before any
// request is made — a device token must never become an SSRF vector.
err := p.Send(context.Background(), push.PushMessage{
DeviceToken: "https://attacker.example.com/steal",
Body: "x",
})
if err == nil {
t.Fatal("expected an error for an endpoint whose host doesn't match the push host")
}
if hit {
t.Error("no request must be sent when the endpoint host doesn't match")
}
if !strings.Contains(err.Error(), "does not match") {
t.Errorf("error should explain the host mismatch; got %v", err)
}
}
func TestName(t *testing.T) { func TestName(t *testing.T) {
p := New(Config{BaseURL: "http://x"}, nil) p := New(Config{BaseURL: "http://x"}, nil)
if p.Name() != "ntfy" { if p.Name() != "ntfy" {

193
core/pkg/push/url_guard.go Normal file
View File

@ -0,0 +1,193 @@
package push
import (
"bytes"
"context"
"fmt"
"net"
"net/url"
"strings"
"time"
)
// url_guard.go — SSRF guard for TENANT-supplied push base URLs.
//
// A tenant can override the ntfy base URL the gateway POSTs to (BYO-ntfy is a
// legitimate use case). Without a guard, a tenant could point it at an internal
// address — cloud metadata (169.254.169.254), the WireGuard mesh (10.0.0.x),
// loopback — turning the gateway's push sender into an SSRF proxy. These checks
// reject internal/reserved targets while still allowing real external hosts.
//
// IMPORTANT: apply these ONLY to tenant-supplied base URLs (the per-namespace
// override). The operator's gateway default (e.g. 127.0.0.1:8090, the local
// ntfy) is trusted and must NOT pass through here — it would be (correctly)
// rejected as loopback.
// baseURLDNSTimeout bounds the hostname-resolution step in CheckBaseURLResolvable.
const baseURLDNSTimeout = 5 * time.Second
// lookupIP resolves a host to its IPs. A package var so tests can substitute a
// deterministic resolver instead of touching real DNS.
var lookupIP = func(ctx context.Context, host string) ([]net.IP, error) {
addrs, err := net.DefaultResolver.LookupIPAddr(ctx, host)
if err != nil {
return nil, err
}
ips := make([]net.IP, len(addrs))
for i, a := range addrs {
ips[i] = a.IP
}
return ips, nil
}
// CheckBaseURLSyntax validates a tenant base URL's scheme and rejects a host
// that is a LITERAL internal/reserved IP. It does NOT resolve hostnames, so it
// is safe to call on hot paths (e.g. per-send dispatcher construction). An
// empty base URL is allowed — it means "use the gateway default".
func CheckBaseURLSyntax(baseURL string) error {
if baseURL == "" {
return nil
}
u, err := url.Parse(baseURL)
if err != nil {
return fmt.Errorf("base_url: invalid URL: %w", err)
}
if u.Scheme != "http" && u.Scheme != "https" {
return fmt.Errorf("base_url: must start with http:// or https:// (got scheme %q)", u.Scheme)
}
host := u.Hostname()
if host == "" {
return fmt.Errorf("base_url: missing host")
}
if ip := net.ParseIP(host); ip != nil {
if isReservedIP(ip) {
return fmt.Errorf("base_url: host %s is a reserved/internal address and is not allowed", host)
}
return nil
}
// net.ParseIP only accepts canonical dotted-decimal / standard IPv6, but the
// OS resolver + net.Dial ALSO accept decimal ("2130706433"), hex
// ("0x7f000001") and octal ("0177.0.0.1") IPv4 encodings — a literal-check
// bypass to internal addresses. Reject these non-standard numeric hosts
// outright (no legitimate push host is all-numeric or 0x-hex).
if looksLikeNumericHost(host) {
return fmt.Errorf("base_url: host %q is a non-standard numeric/IP encoding and is not allowed", host)
}
return nil
}
// CheckBaseURLResolvable runs CheckBaseURLSyntax AND, when the host is a name
// rather than a literal IP, resolves it (bounded) and rejects if ANY resolved
// address is internal/reserved — blocking a tenant from pointing a domain at an
// internal host. It performs DNS, so call it ONLY at config-set time (the PUT
// handlers), never on the hot send path.
//
// Resolution failure FAILS OPEN (allowed): an unresolvable host reaches nothing
// (delivery would fail anyway), and rejecting it would break a legitimate host
// that's momentarily unresolvable at config time. The hard floor is
// CheckBaseURLSyntax's literal-IP block, which applies on every code path.
//
// Residual: as a set-time check it does not defend against DNS rebinding (the
// host re-pointing to an internal IP AFTER it was accepted). Closing that would
// require a send-time IP check, which is complicated here by the operator's
// loopback default ntfy.
func CheckBaseURLResolvable(ctx context.Context, baseURL string) error {
if err := CheckBaseURLSyntax(baseURL); err != nil {
return err
}
if baseURL == "" {
return nil
}
u, _ := url.Parse(baseURL) // already validated by CheckBaseURLSyntax
host := u.Hostname()
if net.ParseIP(host) != nil {
return nil // literal IP already vetted by CheckBaseURLSyntax
}
rctx, cancel := context.WithTimeout(ctx, baseURLDNSTimeout)
defer cancel()
ips, err := lookupIP(rctx, host)
if err != nil || len(ips) == 0 {
return nil // fail open on resolution failure (see doc)
}
for _, ip := range ips {
if isReservedIP(ip) {
return fmt.Errorf("base_url: host %q resolves to reserved/internal address %s and is not allowed", host, ip)
}
}
return nil
}
// IsInternalBaseURL reports whether baseURL parses to a host that is a LITERAL
// internal/reserved IP. Malformed URLs and hostname URLs return false — this is
// the no-false-positive guard for hot paths (e.g. dispatcher build), where the
// goal is only to drop an internal-address override, not to re-validate syntax
// or do DNS (the set-path handlers cover those).
func IsInternalBaseURL(baseURL string) bool {
u, err := url.Parse(baseURL)
if err != nil {
return false
}
host := u.Hostname()
if ip := net.ParseIP(host); ip != nil {
return isReservedIP(ip)
}
// Non-standard numeric encodings (decimal/hex/octal) that net.ParseIP misses
// but net.Dial resolves to an IP — treat as internal so the build-path guard
// matches what the dialer would actually reach.
return looksLikeNumericHost(host)
}
// isReservedIP reports whether ip is in a range a tenant must never be able to
// reach via a push base URL: loopback, link-local (incl. 169.254.169.254 cloud
// metadata), RFC1918 private, ULA, unspecified, multicast, and 100.64/10 CGNAT.
func isReservedIP(ip net.IP) bool {
if ip == nil {
return true // unparseable → treat as unsafe
}
if ip4 := ip.To4(); ip4 != nil {
// 100.64.0.0/10 — carrier-grade NAT (not covered by IsPrivate). The
// second-octet band [64,127] is the /10.
if ip4[0] == 100 && ip4[1] >= 64 && ip4[1] <= 127 {
return true
}
} else if ip16 := ip.To16(); ip16 != nil {
// NAT64 well-known prefix 64:ff9b::/96 (RFC 6052) embeds an IPv4 address
// a NAT64 gateway would translate — so it can reach internal v4.
if bytes.Equal(ip16[:12], []byte{0x00, 0x64, 0xff, 0x9b, 0, 0, 0, 0, 0, 0, 0, 0}) {
return true
}
}
return ip.IsLoopback() ||
ip.IsLinkLocalUnicast() ||
ip.IsLinkLocalMulticast() ||
ip.IsInterfaceLocalMulticast() ||
ip.IsMulticast() ||
ip.IsPrivate() || // 10/8, 172.16/12, 192.168/16, fc00::/7
ip.IsUnspecified()
}
// looksLikeNumericHost reports whether host is a non-standard numeric IPv4
// encoding — hex ("0x7f000001", "0x7f.0.0.1"), decimal ("2130706433"), or octal
// ("0177.0.0.1") — that net.ParseIP rejects but the OS resolver and net.Dial
// accept (resolving to a real, possibly internal, IPv4). Such hosts are never a
// legitimate push server name, so callers reject them rather than let them slip
// past the literal-IP guard. Hosts containing any letter (other than a leading
// "0x") are treated as ordinary DNS names and return false.
func looksLikeNumericHost(host string) bool {
if host == "" {
return false
}
if strings.HasPrefix(strings.ToLower(host), "0x") {
return true // hex literal
}
// All-numeric (optionally dotted) host that net.ParseIP already failed to
// accept: a decimal or octal IPv4 encoding (or a malformed all-numeric
// dotted form). Either way, not a real hostname.
for _, r := range host {
if r != '.' && (r < '0' || r > '9') {
return false
}
}
return true
}

View File

@ -0,0 +1,160 @@
package push
import (
"context"
"errors"
"net"
"testing"
)
// SSRF guard for tenant push base URLs. These pin: literal internal/reserved IPs
// are rejected, the cloud-metadata IP is rejected, legit external hosts pass,
// and a hostname that RESOLVES to an internal address is rejected (the DNS
// vector) while a public-resolving host passes.
func TestCheckBaseURLSyntax(t *testing.T) {
cases := []struct {
url string
wantErr bool
}{
{"", false}, // empty = use default
{"https://push.example.com", false}, // public host
{"http://push.example.com:8090", false},
{"https://1.1.1.1", false}, // public literal IP
{"https://[2606:4700:4700::1111]", false}, // public v6
{"ftp://push.example.com", true}, // bad scheme
{"notaurl", true}, // no scheme/host
{"http://", true}, // missing host
{"http://169.254.169.254", true}, // cloud metadata (link-local)
{"http://127.0.0.1", true}, // loopback
{"http://127.0.0.1:8090", true}, // loopback + port
{"http://10.0.0.5", true}, // RFC1918 (WireGuard mesh)
{"http://192.168.1.1", true}, // RFC1918
{"http://172.16.0.1", true}, // RFC1918
{"http://100.64.0.1", true}, // CGNAT
{"http://0.0.0.0", true}, // unspecified
{"http://[::1]", true}, // v6 loopback
{"http://[fd00::1]", true}, // v6 ULA
{"http://[64:ff9b::a00:5]", true}, // NAT64-embedded 10.0.0.5
{"http://0x7f000001", true}, // hex-encoded 127.0.0.1
{"http://2130706433", true}, // decimal-encoded 127.0.0.1
{"http://0177.0.0.1", true}, // octal-encoded 127.0.0.1
}
for _, tc := range cases {
err := CheckBaseURLSyntax(tc.url)
if tc.wantErr && err == nil {
t.Errorf("CheckBaseURLSyntax(%q) = nil; want error", tc.url)
}
if !tc.wantErr && err != nil {
t.Errorf("CheckBaseURLSyntax(%q) = %v; want nil", tc.url, err)
}
}
}
func TestIsReservedIP(t *testing.T) {
reserved := []string{
"127.0.0.1", "169.254.169.254", "10.0.0.1", "172.16.5.5", "192.168.0.1",
"100.64.0.1", "100.100.100.200", "0.0.0.0", "224.0.0.1", "::1", "fe80::1",
"fd00::1", "ff02::1",
"64:ff9b::a00:1", // NAT64-embedded 10.0.0.1
"64:ff9b::a9fe:a9fe", // NAT64-embedded 169.254.169.254 (metadata)
}
public := []string{"1.1.1.1", "8.8.8.8", "203.0.113.10", "2606:4700:4700::1111"}
for _, s := range reserved {
if ip := net.ParseIP(s); !isReservedIP(ip) {
t.Errorf("isReservedIP(%s) = false; want true (reserved)", s)
}
}
for _, s := range public {
if ip := net.ParseIP(s); isReservedIP(ip) {
t.Errorf("isReservedIP(%s) = true; want false (public)", s)
}
}
if !isReservedIP(nil) {
t.Error("isReservedIP(nil) must be true (unparseable → unsafe)")
}
}
func TestIsInternalBaseURL(t *testing.T) {
internal := []string{
"http://10.0.0.5", "http://169.254.169.254",
"https://127.0.0.1:8090", "http://[::1]", "http://192.168.1.1",
"http://[64:ff9b::a00:5]", // NAT64
"http://0x7f000001", // hex-encoded loopback
"http://2130706433", // decimal-encoded loopback
"http://0177.0.0.1", // octal-encoded loopback
}
notInternal := []string{
"https://push.example.com", // hostname → false (the set path resolves it)
"https://1.1.1.1", // public literal IP
"ns-A-url", // malformed placeholder → must NOT be dropped
"v1", "", "not a url",
}
for _, s := range internal {
if !IsInternalBaseURL(s) {
t.Errorf("IsInternalBaseURL(%q) = false; want true (internal literal IP)", s)
}
}
for _, s := range notInternal {
if IsInternalBaseURL(s) {
t.Errorf("IsInternalBaseURL(%q) = true; want false", s)
}
}
}
func TestCheckBaseURLResolvable(t *testing.T) {
orig := lookupIP
defer func() { lookupIP = orig }()
t.Run("hostname resolving to internal is rejected", func(t *testing.T) {
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
return []net.IP{net.ParseIP("10.0.0.7")}, nil // points at the mesh
}
if err := CheckBaseURLResolvable(context.Background(), "https://evil.example.com"); err == nil {
t.Fatal("expected rejection of a host resolving to an internal address")
}
})
t.Run("hostname resolving to public is allowed", func(t *testing.T) {
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
return []net.IP{net.ParseIP("203.0.113.50")}, nil
}
if err := CheckBaseURLResolvable(context.Background(), "https://push.example.com"); err != nil {
t.Fatalf("public-resolving host should pass: %v", err)
}
})
t.Run("any internal IP among results is rejected", func(t *testing.T) {
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
return []net.IP{net.ParseIP("203.0.113.50"), net.ParseIP("127.0.0.1")}, nil
}
if err := CheckBaseURLResolvable(context.Background(), "https://mixed.example.com"); err == nil {
t.Fatal("a host resolving to ANY internal address must be rejected")
}
})
t.Run("resolution failure is allowed (fail open)", func(t *testing.T) {
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
return nil, errors.New("nxdomain")
}
if err := CheckBaseURLResolvable(context.Background(), "https://unresolvable.example.com"); err != nil {
t.Fatalf("an unresolvable host should fail open (be allowed); got %v", err)
}
})
t.Run("literal internal IP rejected without DNS", func(t *testing.T) {
lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
t.Fatal("DNS must not be consulted for a literal IP host")
return nil, nil
}
if err := CheckBaseURLResolvable(context.Background(), "http://169.254.169.254"); err == nil {
t.Fatal("literal metadata IP must be rejected")
}
})
t.Run("empty is allowed", func(t *testing.T) {
if err := CheckBaseURLResolvable(context.Background(), ""); err != nil {
t.Fatalf("empty base_url should pass: %v", err)
}
})
}

View File

@ -2,6 +2,7 @@ package serverless
import ( import (
"context" "context"
cryptorand "crypto/rand"
"errors" "errors"
"fmt" "fmt"
"time" "time"
@ -318,6 +319,15 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
// gates invocation frequency, not per-invocation host-call volume). // gates invocation frequency, not per-invocation host-call volume).
execCtx = WithPublishCounter(execCtx) execCtx = WithPublishCounter(execCtx)
// Raw-HTTP-response mode (bugboard #835). Only RawHTTPResponse functions
// get a collector attached — set_http_response is a validated no-op for
// every other function (no collector → host call returns an error). The
// collector rides execCtx so concurrent invocations never cross-write,
// matching the publish-counter / log-buffer per-call model.
if fn.RawHTTPResponse {
execCtx = WithRawHTTPCollector(execCtx)
}
// Get compiled module (from cache or compile) // Get compiled module (from cache or compile)
module, err := e.getOrCompileModule(execCtx, fn.WASMCID) module, err := e.getOrCompileModule(execCtx, fn.WASMCID)
if err != nil { if err != nil {
@ -346,6 +356,14 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
return nil, &ExecutionError{FunctionName: fn.Name, RequestID: invCtx.RequestID, Cause: err} return nil, &ExecutionError{FunctionName: fn.Name, RequestID: invCtx.RequestID, Cause: err}
} }
// Surface any verbatim HTTP response the function set (bugboard #835)
// onto invCtx so the Invoker → HTTP handler can replay it. Only
// RawHTTPResponse functions have a collector attached; TakeRawHTTPResponse
// returns (_, false) otherwise.
if res, ok := TakeRawHTTPResponse(execCtx); ok {
invCtx.RawHTTP = &res
}
e.logInvocation(ctx, fn, invCtx, logBuf, startTime, len(output), InvocationStatusSuccess, nil) e.logInvocation(ctx, fn, invCtx, logBuf, startTime, len(output), InvocationStatusSuccess, nil)
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, "success", nil) e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, "success", nil)
return output, nil return output, nil
@ -547,7 +565,13 @@ func (e *Engine) InstantiatePersistent(ctx context.Context, fn *Function, invCtx
// into real clocks via the documented wazero hook — same effect as // into real clocks via the documented wazero hook — same effect as
// the runtime would get on a normal Go process. // the runtime would get on a normal Go process.
WithSysWalltime(). WithSysWalltime().
WithSysNanotime() WithSysNanotime().
// Bugboard #120 — same class as #27. Without WithRandSource, wazero's
// default RNG is deterministic (zero seed), so TinyGo crypto/rand.Read
// returns identical bytes on every fresh instance — constant codes /
// nonces / tokens. Wire in the host CSPRNG. Same fix at
// execution/executor.go for the stateless path.
WithRandSource(cryptorand.Reader)
instance, err := e.runtime.InstantiateModule(ctx, compiled, moduleConfig) instance, err := e.runtime.InstantiateModule(ctx, compiled, moduleConfig)
if err != nil { if err != nil {
@ -742,6 +766,7 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
NewFunctionBuilder().WithFunc(e.hCacheIncrBy).Export("cache_incr_by"). NewFunctionBuilder().WithFunc(e.hCacheIncrBy).Export("cache_incr_by").
NewFunctionBuilder().WithFunc(e.hHTTPFetch).Export("http_fetch"). NewFunctionBuilder().WithFunc(e.hHTTPFetch).Export("http_fetch").
NewFunctionBuilder().WithFunc(e.hAnyoneFetch).Export("anyone_fetch"). NewFunctionBuilder().WithFunc(e.hAnyoneFetch).Export("anyone_fetch").
NewFunctionBuilder().WithFunc(e.hSetHTTPResponse).Export("set_http_response").
NewFunctionBuilder().WithFunc(e.hPubSubPublish).Export("pubsub_publish"). NewFunctionBuilder().WithFunc(e.hPubSubPublish).Export("pubsub_publish").
NewFunctionBuilder().WithFunc(e.hPubSubPublishBatch).Export("pubsub_publish_batch"). NewFunctionBuilder().WithFunc(e.hPubSubPublishBatch).Export("pubsub_publish_batch").
NewFunctionBuilder().WithFunc(e.hPushSend).Export("push_send"). NewFunctionBuilder().WithFunc(e.hPushSend).Export("push_send").
@ -751,6 +776,8 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
NewFunctionBuilder().WithFunc(e.hWSPubSubUnbridge).Export("ws_pubsub_unbridge"). NewFunctionBuilder().WithFunc(e.hWSPubSubUnbridge).Export("ws_pubsub_unbridge").
NewFunctionBuilder().WithFunc(e.hWSSend).Export("ws_send"). NewFunctionBuilder().WithFunc(e.hWSSend).Export("ws_send").
NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast"). NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast").
NewFunctionBuilder().WithFunc(e.hEphemeralStateSet).Export("ephemeral_state_set").
NewFunctionBuilder().WithFunc(e.hEphemeralStateClear).Export("ephemeral_state_clear").
NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke"). NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke").
NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async"). NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async").
NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info"). NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info").
@ -948,6 +975,40 @@ func (e *Engine) hHTTPFetch(ctx context.Context, mod api.Module, methodPtr, meth
return e.executor.WriteToGuest(ctx, mod, resp) return e.executor.WriteToGuest(ctx, mod, resp)
} }
// hSetHTTPResponse is the WASM-callable wrapper for SetHTTPResponse —
// bugboard #835 raw-HTTP-response mode.
//
// ABI: set_http_response(status i32, headersJSONPtr, headersJSONLen,
// bodyPtr, bodyLen uint32) -> uint32. headersJSON (when non-empty) is a JSON
// object of string→string. Returns 1 on success, 0 on failure (function not
// deployed with raw_http_response, bad status, oversized headers/body, or a
// guest-memory read error).
func (e *Engine) hSetHTTPResponse(ctx context.Context, mod api.Module,
status, headersPtr, headersLen, bodyPtr, bodyLen uint32) uint32 {
var headers map[string]string
if headersLen > 0 {
if err := e.executor.UnmarshalJSONFromGuest(mod, headersPtr, headersLen, &headers); err != nil {
e.logger.Warn("set_http_response: failed to unmarshal headers", zap.Error(err))
return 0
}
}
var body []byte
if bodyLen > 0 {
b, ok := e.executor.ReadFromGuest(mod, bodyPtr, bodyLen)
if !ok {
return 0
}
body = b
}
if err := e.hostServices.SetHTTPResponse(ctx, int(status), headers, body); err != nil {
e.logger.Warn("host function set_http_response failed", zap.Error(err))
return 0
}
return 1
}
// hAnyoneFetch is the WASM-callable wrapper for AnyoneFetch — feat-11. // hAnyoneFetch is the WASM-callable wrapper for AnyoneFetch — feat-11.
// Identical ABI to hHTTPFetch (method, url, headers JSON, body), routes // Identical ABI to hHTTPFetch (method, url, headers JSON, body), routes
// through the Anyone SOCKS5 proxy. Returns packed (ptr<<32 | len) to the // through the Anyone SOCKS5 proxy. Returns packed (ptr<<32 | len) to the
@ -1291,6 +1352,67 @@ func (e *Engine) hWSBroadcast(ctx context.Context, mod api.Module,
return 1 return 1
} }
// hEphemeralStateSet is the WASM-callable wrapper for EphemeralStateSet —
// bugboard #710 WS-subscribe-tracked ephemeral state.
//
// ABI: ephemeral_state_set(topicPtr, topicLen, keyPtr, keyLen, payloadPtr,
// payloadLen uint32, ttlMs int64) -> uint32. Returns 1 on success, 0 on
// failure (no WS client in context, empty topic/key, oversized payload,
// per-client key cap, or a guest-memory read error).
func (e *Engine) hEphemeralStateSet(ctx context.Context, mod api.Module,
topicPtr, topicLen, keyPtr, keyLen, payloadPtr, payloadLen uint32, ttlMs int64) uint32 {
topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
if !ok {
return 0
}
key, ok := e.executor.ReadFromGuest(mod, keyPtr, keyLen)
if !ok {
return 0
}
var payload []byte
if payloadLen > 0 {
p, ok := e.executor.ReadFromGuest(mod, payloadPtr, payloadLen)
if !ok {
return 0
}
payload = p
}
if err := e.hostServices.EphemeralStateSet(ctx, string(topic), string(key), payload, ttlMs); err != nil {
e.logger.Warn("host function ephemeral_state_set failed",
zap.String("topic", string(topic)),
zap.String("key", string(key)),
zap.Error(err))
return 0
}
return 1
}
// hEphemeralStateClear is the WASM-callable wrapper for EphemeralStateClear.
//
// ABI: ephemeral_state_clear(topicPtr, topicLen, keyPtr, keyLen uint32) ->
// uint32. Returns 1 on success (including idempotent clears of a missing key),
// 0 on failure (no WS client in context, empty topic/key, or a guest-memory
// read error).
func (e *Engine) hEphemeralStateClear(ctx context.Context, mod api.Module,
topicPtr, topicLen, keyPtr, keyLen uint32) uint32 {
topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
if !ok {
return 0
}
key, ok := e.executor.ReadFromGuest(mod, keyPtr, keyLen)
if !ok {
return 0
}
if err := e.hostServices.EphemeralStateClear(ctx, string(topic), string(key)); err != nil {
e.logger.Warn("host function ephemeral_state_clear failed",
zap.String("topic", string(topic)),
zap.String("key", string(key)),
zap.Error(err))
return 0
}
return 1
}
// hPushSend is the WASM-callable wrapper for PushSend. // hPushSend is the WASM-callable wrapper for PushSend.
// Inputs: // Inputs:
// userIDPtr/userIDLen — UTF-8 user ID to push to (within the function's // userIDPtr/userIDLen — UTF-8 user ID to push to (within the function's

View File

@ -0,0 +1,52 @@
package serverless
import (
"context"
"testing"
"go.uber.org/zap"
)
// fakeWSConn is a no-op WebSocketConn for exercising WSManager lifecycle.
type fakeWSConn struct{}
func (fakeWSConn) WriteMessage(int, []byte) error { return nil }
func (fakeWSConn) ReadMessage() (int, []byte, error) { return 0, nil, nil }
func (fakeWSConn) Close() error { return nil }
// TestWSManager_DisconnectHookClearsEphemeralState verifies the wiring that
// makes Feature #710's auto-clear work: a disconnect hook registered on the
// WSManager fires on Unregister, clearing the disconnecting client's ephemeral
// state. Both the stateless and persistent WS handlers call Unregister, so
// this single hook covers both paths.
func TestWSManager_DisconnectHookClearsEphemeralState(t *testing.T) {
logger := zap.NewNop()
wsm := NewWSManager(logger)
pub := &capturePublisher{}
store := NewEphemeralStore(pub.publish)
// Wire the hook exactly as NewHostFunctions does.
wsm.AddDisconnectHook(func(clientID string) {
store.ClearClient(context.Background(), clientID)
})
clientID := "client-A"
wsm.Register(clientID, fakeWSConn{})
if err := store.Set(context.Background(), "ns1", clientID, "t", "k", []byte("p"), 0); err != nil {
t.Fatalf("Set: %v", err)
}
if store.keyCountForTest() != 1 {
t.Fatalf("expected 1 key before disconnect, got %d", store.keyCountForTest())
}
// Disconnect → hook fires → state cleared + synthetic clear published.
wsm.Unregister(clientID)
if store.keyCountForTest() != 0 {
t.Errorf("disconnect hook did not clear ephemeral state, count=%d", store.keyCountForTest())
}
if pub.countKind(EphemeralEventClear) != 1 {
t.Errorf("expected 1 synthetic clear on disconnect, got %d", pub.countKind(EphemeralEventClear))
}
}

View File

@ -0,0 +1,352 @@
package serverless
import (
"context"
"encoding/json"
"fmt"
"sync"
"time"
)
// WS-subscribe-tracked ephemeral state primitive (bugboard #710).
//
// A serverless function can publish short-lived per-subscriber state (typing
// indicators, "online" flags, cursor positions, …) keyed by (topic, key) and
// have the gateway AUTO-CLEAR that state the moment the owning WebSocket
// client disconnects — publishing a synthetic clear event so every subscriber
// sees the state vanish with zero cron lag. State also expires on a TTL as a
// backstop.
//
// Ownership model: each set is tagged with the CURRENT invocation's WS client
// ID (the same source GetWSClientID reads). On disconnect the store iterates
// that client's owned (topic,key) entries, publishes a clear event for each,
// and drops them. A client's disconnect never touches another client's state.
const (
// ephemeralMaxKeysPerClient caps how many distinct (topic,key) entries a
// single WS client may own at once. Bounds the per-client memory + the
// fan-out of synthetic clears on disconnect.
ephemeralMaxKeysPerClient = 256
// ephemeralMaxPayloadBytes caps a single ephemeral payload. Generous for
// presence/typing/cursor metadata while bounding gateway memory.
ephemeralMaxPayloadBytes = 16 << 10 // 16 KiB
// ephemeralMaxTTL caps the requested TTL. Ephemeral state is meant to be
// short-lived; the disconnect hook is the primary cleanup path and the TTL
// is only a backstop, so a long TTL is never useful.
ephemeralMaxTTL = 30 * time.Minute
// ephemeralDefaultTTL is applied when a caller passes ttlMs <= 0.
ephemeralDefaultTTL = 60 * time.Second
// ephemeralSweepInterval is how often the backstop sweeper scans for
// expired entries. The disconnect hook handles the common case; the
// sweeper only catches entries whose owner is still connected but whose
// TTL elapsed.
ephemeralSweepInterval = 10 * time.Second
)
// EphemeralEventKind discriminates the synthetic events published on a topic.
type EphemeralEventKind string
const (
EphemeralEventSet EphemeralEventKind = "set"
EphemeralEventClear EphemeralEventKind = "clear"
)
// EphemeralEvent is the wire shape published on the topic when ephemeral state
// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers key off
// Kind + Key to update their local view. Payload is only populated for "set".
type EphemeralEvent struct {
Type string `json:"__ephemeral"` // always "state"
Kind EphemeralEventKind `json:"kind"` // set | clear
Key string `json:"key"` // app-chosen key
ClientID string `json:"client_id"` // owning WS client
// Payload is the opaque app-chosen blob (may be JSON, protobuf, or
// arbitrary bytes), present only for "set". encoding/json base64-encodes
// a []byte on the wire, so subscribers base64-decode "payload" to recover
// the original bytes — mirroring how pubsub_publish_batch carries data.
Payload []byte `json:"payload,omitempty"`
Reason string `json:"reason,omitempty"` // clear only: explicit|disconnect|expired
}
// ephemeralPublisher publishes data on a (namespace, topic). Abstracted so the
// store can publish synthetic clears without depending on the concrete pubsub
// adapter type — and so tests can capture published events. Namespace handling
// matches the host pubsub path: the adapter namespaces internally, so this
// publisher receives the already-namespaced caller's topic verbatim.
type ephemeralPublisher func(ctx context.Context, namespace, topic string, data []byte) error
// ephemeralEntry is one stored value plus its expiry and the metadata needed
// to publish a clear event for it.
type ephemeralEntry struct {
namespace string
topic string
key string
clientID string
payload []byte
expiresAt time.Time
}
// ephemeralStateKey identifies a stored value across namespaces/topics.
type ephemeralStateKey struct {
namespace string
topic string
key string
}
// EphemeralStore holds WS-subscribe-tracked ephemeral state with auto-clear on
// disconnect (bugboard #710). Safe for concurrent use.
type EphemeralStore struct {
publish ephemeralPublisher
mu sync.Mutex
// values keyed by (ns, topic, key).
values map[ephemeralStateKey]*ephemeralEntry
// owned maps a clientID to the set of state keys it owns, for O(1)
// disconnect cleanup.
owned map[string]map[ephemeralStateKey]struct{}
// sweeper lifecycle.
stopOnce sync.Once
stopCh chan struct{}
now func() time.Time // injectable clock for tests
}
// NewEphemeralStore constructs a store with the given publisher. The publisher
// may be nil (set/clear then skip publishing) — useful in tests, but in
// production the host wires the pubsub adapter so subscribers see events.
func NewEphemeralStore(publish ephemeralPublisher) *EphemeralStore {
return &EphemeralStore{
publish: publish,
values: make(map[ephemeralStateKey]*ephemeralEntry),
owned: make(map[string]map[ephemeralStateKey]struct{}),
stopCh: make(chan struct{}),
now: time.Now,
}
}
// Set records an ephemeral value owned by clientID and publishes a "set" event
// on the topic so subscribers observe it. Returns an error on validation
// failure (empty client/topic/key, oversized payload, per-client cap reached).
func (s *EphemeralStore) Set(ctx context.Context, namespace, clientID, topic, key string, payload []byte, ttlMs int64) error {
if clientID == "" {
return fmt.Errorf("ephemeral_state_set: requires a WebSocket client (no ws_client_id in invocation context)")
}
if topic == "" || key == "" {
return fmt.Errorf("ephemeral_state_set: topic and key are required")
}
if len(payload) > ephemeralMaxPayloadBytes {
return fmt.Errorf("ephemeral_state_set: payload too large (%d > %d bytes)", len(payload), ephemeralMaxPayloadBytes)
}
ttl := time.Duration(ttlMs) * time.Millisecond
if ttl <= 0 {
ttl = ephemeralDefaultTTL
}
if ttl > ephemeralMaxTTL {
ttl = ephemeralMaxTTL
}
sk := ephemeralStateKey{namespace: namespace, topic: topic, key: key}
payloadCopy := make([]byte, len(payload))
copy(payloadCopy, payload)
s.mu.Lock()
ownedSet := s.owned[clientID]
// Enforce the per-client cap only for NEW keys this client doesn't already
// own — overwriting an existing key must always be allowed.
if _, alreadyOwned := s.values[sk]; !alreadyOwned || s.values[sk].clientID != clientID {
if len(ownedSet) >= ephemeralMaxKeysPerClient {
s.mu.Unlock()
return fmt.Errorf("ephemeral_state_set: client %s exceeded max %d ephemeral keys", clientID, ephemeralMaxKeysPerClient)
}
}
// If a different client owned this exact (ns,topic,key), transfer ownership
// — drop it from the previous owner's set so its disconnect won't clear
// state it no longer owns.
if prev, ok := s.values[sk]; ok && prev.clientID != clientID {
if prevSet := s.owned[prev.clientID]; prevSet != nil {
delete(prevSet, sk)
if len(prevSet) == 0 {
delete(s.owned, prev.clientID)
}
}
}
s.values[sk] = &ephemeralEntry{
namespace: namespace,
topic: topic,
key: key,
clientID: clientID,
payload: payloadCopy,
expiresAt: s.now().Add(ttl),
}
if ownedSet == nil {
ownedSet = make(map[ephemeralStateKey]struct{})
s.owned[clientID] = ownedSet
}
ownedSet[sk] = struct{}{}
s.mu.Unlock()
evt := EphemeralEvent{
Type: "state",
Kind: EphemeralEventSet,
Key: key,
ClientID: clientID,
Payload: payloadCopy,
}
return s.publishEvent(ctx, namespace, topic, evt)
}
// Clear removes an ephemeral value the client owns and publishes a "clear"
// event with reason "explicit". Clearing a key the client does not own (or a
// missing key) is a no-op that still returns nil — clears are idempotent.
func (s *EphemeralStore) Clear(ctx context.Context, namespace, clientID, topic, key string) error {
if clientID == "" {
return fmt.Errorf("ephemeral_state_clear: requires a WebSocket client (no ws_client_id in invocation context)")
}
if topic == "" || key == "" {
return fmt.Errorf("ephemeral_state_clear: topic and key are required")
}
sk := ephemeralStateKey{namespace: namespace, topic: topic, key: key}
s.mu.Lock()
entry, ok := s.values[sk]
if !ok || entry.clientID != clientID {
// Not present, or owned by someone else — idempotent no-op.
s.mu.Unlock()
return nil
}
s.removeLocked(sk, entry)
s.mu.Unlock()
return s.publishEvent(ctx, namespace, topic, EphemeralEvent{
Type: "state",
Kind: EphemeralEventClear,
Key: key,
ClientID: clientID,
Reason: "explicit",
})
}
// ClearClient removes every entry owned by clientID and publishes a clear
// event for each (reason "disconnect"). Called from the WS disconnect hook —
// the primary, zero-lag cleanup path. Safe to call for an unknown client.
func (s *EphemeralStore) ClearClient(ctx context.Context, clientID string) {
s.clearClientWithReason(ctx, clientID, "disconnect")
}
func (s *EphemeralStore) clearClientWithReason(ctx context.Context, clientID, reason string) {
s.mu.Lock()
ownedSet := s.owned[clientID]
if len(ownedSet) == 0 {
delete(s.owned, clientID)
s.mu.Unlock()
return
}
// Snapshot entries to publish after releasing the lock.
toClear := make([]*ephemeralEntry, 0, len(ownedSet))
for sk := range ownedSet {
if entry, ok := s.values[sk]; ok {
toClear = append(toClear, entry)
delete(s.values, sk)
}
}
delete(s.owned, clientID)
s.mu.Unlock()
for _, entry := range toClear {
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
Type: "state",
Kind: EphemeralEventClear,
Key: entry.key,
ClientID: clientID,
Reason: reason,
})
}
}
// removeLocked drops one entry from both maps. Caller holds s.mu.
func (s *EphemeralStore) removeLocked(sk ephemeralStateKey, entry *ephemeralEntry) {
delete(s.values, sk)
if set := s.owned[entry.clientID]; set != nil {
delete(set, sk)
if len(set) == 0 {
delete(s.owned, entry.clientID)
}
}
}
// publishEvent marshals and publishes a synthetic event. No-op (nil) when no
// publisher is wired.
func (s *EphemeralStore) publishEvent(ctx context.Context, namespace, topic string, evt EphemeralEvent) error {
if s.publish == nil {
return nil
}
data, err := json.Marshal(evt)
if err != nil {
return fmt.Errorf("ephemeral state: marshal event: %w", err)
}
if err := s.publish(ctx, namespace, topic, data); err != nil {
return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Kind, err)
}
return nil
}
// StartSweeper launches the TTL backstop sweeper. Idempotent guards aren't
// provided — call exactly once. Stop with StopSweeper.
func (s *EphemeralStore) StartSweeper() {
go func() {
ticker := time.NewTicker(ephemeralSweepInterval)
defer ticker.Stop()
for {
select {
case <-s.stopCh:
return
case <-ticker.C:
s.sweepExpired(context.Background())
}
}
}()
}
// StopSweeper stops the backstop sweeper. Safe to call multiple times.
func (s *EphemeralStore) StopSweeper() {
s.stopOnce.Do(func() { close(s.stopCh) })
}
// sweepExpired removes and publishes clears for every entry whose TTL elapsed.
func (s *EphemeralStore) sweepExpired(ctx context.Context) {
now := s.now()
s.mu.Lock()
var expired []*ephemeralEntry
for sk, entry := range s.values {
if now.After(entry.expiresAt) {
expired = append(expired, entry)
s.removeLocked(sk, entry)
}
}
s.mu.Unlock()
for _, entry := range expired {
_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
Type: "state",
Kind: EphemeralEventClear,
Key: entry.key,
ClientID: entry.clientID,
Reason: "expired",
})
}
}
// keyCountForTest returns the number of stored values (test-only accessor).
func (s *EphemeralStore) keyCountForTest() int {
s.mu.Lock()
defer s.mu.Unlock()
return len(s.values)
}

View File

@ -0,0 +1,295 @@
package serverless
import (
"context"
"encoding/json"
"fmt"
"sync"
"testing"
"time"
)
// capturePublisher records every published event for assertions.
type capturePublisher struct {
mu sync.Mutex
events []capturedEvent
}
type capturedEvent struct {
namespace string
topic string
event EphemeralEvent
}
func (c *capturePublisher) publish(_ context.Context, namespace, topic string, data []byte) error {
var evt EphemeralEvent
if err := json.Unmarshal(data, &evt); err != nil {
return err
}
c.mu.Lock()
c.events = append(c.events, capturedEvent{namespace: namespace, topic: topic, event: evt})
c.mu.Unlock()
return nil
}
func (c *capturePublisher) snapshot() []capturedEvent {
c.mu.Lock()
defer c.mu.Unlock()
out := make([]capturedEvent, len(c.events))
copy(out, c.events)
return out
}
func (c *capturePublisher) countKind(kind EphemeralEventKind) int {
c.mu.Lock()
defer c.mu.Unlock()
n := 0
for _, e := range c.events {
if e.event.Kind == kind {
n++
}
}
return n
}
func newTestStore(pub ephemeralPublisher) *EphemeralStore {
s := NewEphemeralStore(pub)
return s
}
func TestEphemeralStore_SetThenClear(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
ctx := context.Background()
if err := s.Set(ctx, "ns1", "client-A", "typing:room1", "k1", []byte(`{"typing":true}`), 0); err != nil {
t.Fatalf("Set: %v", err)
}
if s.keyCountForTest() != 1 {
t.Fatalf("expected 1 stored key, got %d", s.keyCountForTest())
}
if err := s.Clear(ctx, "ns1", "client-A", "typing:room1", "k1"); err != nil {
t.Fatalf("Clear: %v", err)
}
if s.keyCountForTest() != 0 {
t.Errorf("expected 0 stored keys after clear, got %d", s.keyCountForTest())
}
if got := pub.countKind(EphemeralEventSet); got != 1 {
t.Errorf("set events = %d, want 1", got)
}
if got := pub.countKind(EphemeralEventClear); got != 1 {
t.Errorf("clear events = %d, want 1", got)
}
// The set event must carry the payload verbatim.
evts := pub.snapshot()
if string(evts[0].event.Payload) != `{"typing":true}` {
t.Errorf("set payload = %q, want the original JSON", evts[0].event.Payload)
}
if evts[1].event.Reason != "explicit" {
t.Errorf("clear reason = %q, want explicit", evts[1].event.Reason)
}
}
func TestEphemeralStore_SetThenDisconnect(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
ctx := context.Background()
if err := s.Set(ctx, "ns1", "client-A", "topicX", "kA", []byte("p1"), 0); err != nil {
t.Fatalf("Set kA: %v", err)
}
if err := s.Set(ctx, "ns1", "client-A", "topicY", "kB", []byte("p2"), 0); err != nil {
t.Fatalf("Set kB: %v", err)
}
s.ClearClient(ctx, "client-A")
if s.keyCountForTest() != 0 {
t.Errorf("expected all state dropped on disconnect, got %d", s.keyCountForTest())
}
// One synthetic clear per owned key, all reason=disconnect.
if got := pub.countKind(EphemeralEventClear); got != 2 {
t.Errorf("disconnect clear events = %d, want 2", got)
}
for _, e := range pub.snapshot() {
if e.event.Kind == EphemeralEventClear && e.event.Reason != "disconnect" {
t.Errorf("clear reason = %q, want disconnect", e.event.Reason)
}
}
}
func TestEphemeralStore_TTLExpiry(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
ctx := context.Background()
// Freeze the clock so we control expiry deterministically.
base := time.Now()
s.now = func() time.Time { return base }
if err := s.Set(ctx, "ns1", "client-A", "topicX", "kA", []byte("p"), 1000); err != nil {
t.Fatalf("Set: %v", err)
}
// Before expiry: sweep is a no-op.
s.sweepExpired(ctx)
if s.keyCountForTest() != 1 {
t.Fatalf("entry expired too early, count=%d", s.keyCountForTest())
}
// Advance past the 1s TTL and sweep.
s.now = func() time.Time { return base.Add(2 * time.Second) }
s.sweepExpired(ctx)
if s.keyCountForTest() != 0 {
t.Errorf("entry not swept after TTL, count=%d", s.keyCountForTest())
}
// A clear event with reason=expired must have been published.
foundExpired := false
for _, e := range pub.snapshot() {
if e.event.Kind == EphemeralEventClear && e.event.Reason == "expired" {
foundExpired = true
}
}
if !foundExpired {
t.Error("expected a clear event with reason=expired")
}
}
func TestEphemeralStore_TTLClampedToMax(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
base := time.Now()
s.now = func() time.Time { return base }
// Request a TTL far beyond the max; it must be clamped.
huge := (ephemeralMaxTTL + time.Hour).Milliseconds()
if err := s.Set(context.Background(), "ns1", "c", "t", "k", []byte("p"), huge); err != nil {
t.Fatalf("Set: %v", err)
}
s.mu.Lock()
entry := s.values[ephemeralStateKey{namespace: "ns1", topic: "t", key: "k"}]
s.mu.Unlock()
if entry == nil {
t.Fatal("entry missing")
}
maxExpiry := base.Add(ephemeralMaxTTL)
if entry.expiresAt.After(maxExpiry) {
t.Errorf("TTL not clamped: expiresAt %v after max %v", entry.expiresAt, maxExpiry)
}
}
func TestEphemeralStore_PerClientCapEnforced(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
ctx := context.Background()
for i := 0; i < ephemeralMaxKeysPerClient; i++ {
if err := s.Set(ctx, "ns1", "client-A", "t", fmt.Sprintf("k%d", i), []byte("p"), 0); err != nil {
t.Fatalf("Set #%d: %v", i, err)
}
}
// The next NEW key must be rejected.
err := s.Set(ctx, "ns1", "client-A", "t", "overflow", []byte("p"), 0)
if err == nil {
t.Fatal("expected per-client cap error")
}
if s.keyCountForTest() != ephemeralMaxKeysPerClient {
t.Errorf("stored keys = %d, want %d (overflow must not be stored)", s.keyCountForTest(), ephemeralMaxKeysPerClient)
}
// Overwriting an EXISTING key must still succeed even at the cap.
if err := s.Set(ctx, "ns1", "client-A", "t", "k0", []byte("updated"), 0); err != nil {
t.Errorf("overwrite at cap rejected: %v", err)
}
}
func TestEphemeralStore_ClientIsolation(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
ctx := context.Background()
if err := s.Set(ctx, "ns1", "client-A", "t", "kA", []byte("a"), 0); err != nil {
t.Fatalf("Set A: %v", err)
}
if err := s.Set(ctx, "ns1", "client-B", "t", "kB", []byte("b"), 0); err != nil {
t.Fatalf("Set B: %v", err)
}
// Disconnecting A must NOT touch B's state.
s.ClearClient(ctx, "client-A")
if s.keyCountForTest() != 1 {
t.Fatalf("expected B's single key to survive A's disconnect, got %d", s.keyCountForTest())
}
s.mu.Lock()
_, bSurvives := s.values[ephemeralStateKey{namespace: "ns1", topic: "t", key: "kB"}]
s.mu.Unlock()
if !bSurvives {
t.Error("client-B's state was wrongly cleared by client-A's disconnect")
}
// A also cannot clear B's key (not the owner): idempotent no-op.
if err := s.Clear(ctx, "ns1", "client-A", "t", "kB"); err != nil {
t.Fatalf("cross-client Clear should be a no-op, got err: %v", err)
}
if s.keyCountForTest() != 1 {
t.Error("client-A managed to clear client-B's key")
}
}
func TestEphemeralStore_SetValidation(t *testing.T) {
s := newTestStore(nil)
ctx := context.Background()
if err := s.Set(ctx, "ns1", "", "t", "k", nil, 0); err == nil {
t.Error("expected error for empty client ID")
}
if err := s.Set(ctx, "ns1", "c", "", "k", nil, 0); err == nil {
t.Error("expected error for empty topic")
}
if err := s.Set(ctx, "ns1", "c", "t", "", nil, 0); err == nil {
t.Error("expected error for empty key")
}
big := make([]byte, ephemeralMaxPayloadBytes+1)
if err := s.Set(ctx, "ns1", "c", "t", "k", big, 0); err == nil {
t.Error("expected error for oversized payload")
}
}
func TestEphemeralStore_ClearClientUnknownIsNoOp(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
// No panic, no events for an unknown client.
s.ClearClient(context.Background(), "nobody")
if len(pub.snapshot()) != 0 {
t.Error("ClearClient on unknown client should publish nothing")
}
}
func TestEphemeralStore_OwnershipTransfer(t *testing.T) {
pub := &capturePublisher{}
s := newTestStore(pub.publish)
ctx := context.Background()
// client-A sets, then client-B overwrites the SAME (topic,key).
if err := s.Set(ctx, "ns1", "client-A", "t", "shared", []byte("a"), 0); err != nil {
t.Fatalf("Set A: %v", err)
}
if err := s.Set(ctx, "ns1", "client-B", "t", "shared", []byte("b"), 0); err != nil {
t.Fatalf("Set B: %v", err)
}
// A's disconnect must NOT clear the key now owned by B.
s.ClearClient(ctx, "client-A")
if s.keyCountForTest() != 1 {
t.Errorf("ownership transfer failed: key dropped on prior owner's disconnect, count=%d", s.keyCountForTest())
}
// B's disconnect clears it.
s.ClearClient(ctx, "client-B")
if s.keyCountForTest() != 0 {
t.Errorf("new owner's disconnect did not clear, count=%d", s.keyCountForTest())
}
}

View File

@ -3,6 +3,7 @@ package execution
import ( import (
"bytes" "bytes"
"context" "context"
cryptorand "crypto/rand"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -80,7 +81,15 @@ func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledMo
// invocation that uses time.Now() (receipts, audit rows, cursor cmp). // invocation that uses time.Now() (receipts, audit rows, cursor cmp).
// Same fix applied at engine.go for the persistent-WS path. // Same fix applied at engine.go for the persistent-WS path.
WithSysWalltime(). WithSysWalltime().
WithSysNanotime() WithSysNanotime().
// Bugboard #120 — same class as #27. Without WithRandSource, wazero
// uses a deterministic zero-seed RNG, so TinyGo's crypto/rand.Read
// returns IDENTICAL bytes on every fresh instance (and every
// invocation is a fresh instance). That makes any unguessable ID /
// code / nonce / token constant. Wire in the host CSPRNG so
// crypto/rand (and auto-seeded math/rand) work. Same fix at
// engine.go for the persistent-WS path.
WithRandSource(cryptorand.Reader)
// Acquire concurrency slot // Acquire concurrency slot
if e.sem != nil { if e.sem != nil {

View File

@ -0,0 +1,181 @@
package execution
import (
"context"
cryptorand "crypto/rand"
"encoding/binary"
"testing"
"github.com/tetratelabs/wazero"
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
)
// Bugboard #120 — wazero defaults to a DETERMINISTIC (zero-seed) RNG source.
// TinyGo wasm's crypto/rand.Read calls WASI random_get, so without
// .WithRandSource(crypto/rand.Reader) every fresh instance gets the IDENTICAL
// "random" byte sequence. Each serverless invocation is a fresh instance, so
// any unguessable code / nonce / token a function generates is constant (the
// observed "8LRJ2S on every rotate" symptom).
//
// The fix is .WithRandSource(cryptorand.Reader) on BOTH wazero moduleConfig
// builders — executor.go (stateless) and engine.go (persistent WS). This test
// pins the executor's config path: instantiate the SAME config twice and assert
// the two instances produce DIFFERENT random bytes.
//
// If a future refactor drops .WithRandSource(), the positive test fails with a
// clear message; the negative control documents why the fix is necessary.
// randProbeWasm is a hand-assembled WASM module that imports
// wasi_snapshot_preview1.random_get and calls it from _start, writing 8 random
// bytes to memory[0:8].
//
// (module
// (type $random_get (func (param i32 i32) (result i32)))
// (type $start (func))
// (import "wasi_snapshot_preview1" "random_get"
// (func $random_get (type 0)))
// (memory (export "memory") 1)
// (func $_start (type 1)
// i32.const 0 ;; buf = 0
// i32.const 8 ;; buf_len = 8
// call $random_get
// drop)
// (export "_start" (func $_start)))
var randProbeWasm = []byte{
// Magic + version
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
// Type section (id=1) — body=10 bytes
0x01,
0x0a,
0x02, // 2 types
0x60, 0x02, 0x7f, 0x7f, // type 0: func(i32, i32)
0x01, 0x7f, // -> (i32)
0x60, 0x00, 0x00, // type 1: func() -> ()
// Import section (id=2) — body=0x25 (37 bytes)
0x02,
0x25,
0x01, // 1 import
0x16, // module name "wasi_snapshot_preview1" length=22
0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, 0x65, 0x77, 0x31,
0x0a, // fn name "random_get" length=10
0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x5f, 0x67, 0x65, 0x74,
0x00, 0x00, // kind=func, type idx=0
// Function section (id=3) — body=2 bytes
0x03,
0x02,
0x01, // 1 function
0x01, // type idx 1 (for _start)
// Memory section (id=5) — body=3 bytes
0x05,
0x03,
0x01, // 1 memory
0x00, 0x01, // limits: flags=0 (no max), min=1 page
// Export section (id=7) — body=19 bytes (0x13)
0x07,
0x13,
0x02, // 2 exports
0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, // "memory"
0x02, 0x00, // kind=memory, idx=0
0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, // "_start"
0x00, 0x01, // kind=func, idx=1 (after the 1 import)
// Code section (id=10) — body=11 bytes (0x0b)
0x0a,
0x0b,
0x01, // 1 function body
0x09, // body size = 9
0x00, // 0 local groups
0x41, 0x00, // i32.const 0 (buf)
0x41, 0x08, // i32.const 8 (buf_len)
0x10, 0x00, // call func 0 (the imported random_get)
0x1a, // drop (errno return)
0x0b, // end
}
// readProbeRandom instantiates randProbeWasm once with the given moduleConfig
// transform and returns the 8 random bytes written to memory[0:8].
func readProbeRandom(t *testing.T, runtime wazero.Runtime, compiled wazero.CompiledModule, cfg wazero.ModuleConfig) uint64 {
t.Helper()
ctx := context.Background()
mod, err := runtime.InstantiateModule(ctx, compiled, cfg)
if err != nil {
t.Fatalf("instantiate probe module: %v", err)
}
defer mod.Close(ctx)
raw, ok := mod.Memory().Read(0, 8)
if !ok {
t.Fatal("could not read 8 bytes from probe memory at offset 0")
}
return binary.LittleEndian.Uint64(raw)
}
func TestModuleConfig_randSourceIsRealNotDeterministic(t *testing.T) {
ctx := context.Background()
runtime := wazero.NewRuntime(ctx)
defer runtime.Close(ctx)
if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil {
t.Fatalf("instantiate WASI: %v", err)
}
compiled, err := runtime.CompileModule(ctx, randProbeWasm)
if err != nil {
t.Fatalf("compile probe wasm: %v (hex assembly likely off; recompute section sizes)", err)
}
defer compiled.Close(ctx)
// Mirror the executor.go moduleConfig — anonymous instance, real RNG. Two
// separate instantiations of the SAME config must produce different bytes.
newCfg := func() wazero.ModuleConfig {
return wazero.NewModuleConfig().
WithName("").
WithArgs("probe").
WithSysWalltime().
WithSysNanotime().
WithRandSource(cryptorand.Reader)
}
a := readProbeRandom(t, runtime, compiled, newCfg())
b := readProbeRandom(t, runtime, compiled, newCfg())
if a == b {
t.Errorf("BUG #120 REGRESSION: two fresh instances produced IDENTICAL random "+
"bytes (%#016x) — crypto/rand is deterministic. Did the "+
".WithRandSource(cryptorand.Reader) call get dropped from moduleConfig "+
"in executor.go or engine.go?", a)
}
}
func TestModuleConfig_randWithoutFix_demoDeterministic(t *testing.T) {
// Negative control: WITHOUT .WithRandSource(), confirm wazero's default RNG
// is deterministic (identical bytes across fresh instances). This pins the
// *cause*. If wazero ever defaults to a real entropy source, this test
// fails — making the change visible instead of silently invalidating the
// fix's necessity.
ctx := context.Background()
runtime := wazero.NewRuntime(ctx)
defer runtime.Close(ctx)
if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil {
t.Fatalf("instantiate WASI: %v", err)
}
compiled, err := runtime.CompileModule(ctx, randProbeWasm)
if err != nil {
t.Fatalf("compile probe wasm: %v", err)
}
defer compiled.Close(ctx)
newDefault := func() wazero.ModuleConfig {
return wazero.NewModuleConfig().WithName("").WithArgs("probe")
}
a := readProbeRandom(t, runtime, compiled, newDefault())
b := readProbeRandom(t, runtime, compiled, newDefault())
if a != b {
t.Skipf("wazero default RandSource now differs across instances (%#016x vs %#016x) — "+
"if real-by-default upstream, the bug-#120 fix may be redundant; review", a, b)
}
// Determinism confirmed → fix is meaningful.
}

View File

@ -134,6 +134,18 @@ func (m *mockHostServices) WSPubSubUnbridge(ctx context.Context, clientID, topic
return nil return nil
} }
func (m *mockHostServices) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
return SetRawHTTPResponse(ctx, status, headers, body)
}
func (m *mockHostServices) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
return nil
}
func (m *mockHostServices) EphemeralStateClear(ctx context.Context, topic, key string) error {
return nil
}
func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error { func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
return nil return nil
} }

View File

@ -1,6 +1,7 @@
package hostfunctions package hostfunctions
import ( import (
"context"
"net/http" "net/http"
"time" "time"
@ -57,7 +58,7 @@ func NewHostFunctions(
anyoneHTTPClient.Timeout = httpTimeout anyoneHTTPClient.Timeout = httpTimeout
} }
return &HostFunctions{ hf := &HostFunctions{
db: db, db: db,
cacheClient: cacheClient, cacheClient: cacheClient,
storage: storage, storage: storage,
@ -77,4 +78,28 @@ func NewHostFunctions(
logs: make([]serverless.LogEntry, 0), logs: make([]serverless.LogEntry, 0),
asyncInvokeSem: make(chan struct{}, asyncInvokeMaxInFlight), asyncInvokeSem: make(chan struct{}, asyncInvokeMaxInFlight),
} }
// Ephemeral-state store (bugboard #710). Publishes synthetic set/clear
// events through the same pubsub adapter the pubsub_publish host fn uses,
// and registers a WS disconnect hook so a client's owned state auto-clears
// the instant its WebSocket drops — zero cron lag. Only wired when a
// concrete WSManager is present (the disconnect hook + sweeper need it);
// otherwise ephemeral_state_set returns an error.
if wsm, ok := wsManager.(*serverless.WSManager); ok && wsm != nil {
var publish func(ctx context.Context, namespace, topic string, data []byte) error
if pubsubAdapter != nil {
publish = func(ctx context.Context, _ string, topic string, data []byte) error {
// The adapter namespaces internally (same as PubSubPublish), so
// the namespace arg is informational only here.
return pubsubAdapter.Publish(ctx, topic, data)
}
}
hf.ephemeralStore = serverless.NewEphemeralStore(publish)
wsm.AddDisconnectHook(func(clientID string) {
hf.ephemeralStore.ClearClient(context.Background(), clientID)
})
hf.ephemeralStore.StartSweeper()
}
return hf
} }

View File

@ -17,6 +17,18 @@ func (h *HostFunctions) HTTPFetch(ctx context.Context, method, url string, heade
return h.doFetch(ctx, "http_fetch", h.httpClient, method, url, headers, body) return h.doFetch(ctx, "http_fetch", h.httpClient, method, url, headers, body)
} }
// SetHTTPResponse records a verbatim HTTP response for a RawHTTPResponse
// function (bugboard #835). It delegates to the per-invocation collector
// attached on ctx by the engine; the HTTP invoke handler replays the result
// byte-for-byte. Validation (raw mode enabled, status range, header/body caps)
// lives in serverless.SetRawHTTPResponse.
func (h *HostFunctions) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
if err := serverless.SetRawHTTPResponse(ctx, status, headers, body); err != nil {
return &serverless.HostFunctionError{Function: "set_http_response", Cause: err}
}
return nil
}
// AnyoneFetch makes an outbound HTTP request routed through the Anyone // AnyoneFetch makes an outbound HTTP request routed through the Anyone
// (ANyONe protocol) SOCKS5 proxy, so the third-party endpoint sees an // (ANyONe protocol) SOCKS5 proxy, so the third-party endpoint sees an
// Anyone exit IP instead of the gateway IP and the gateway can't // Anyone exit IP instead of the gateway IP and the gateway can't

View File

@ -186,6 +186,40 @@ func dedupBatchByTopic(msgs []pubsub.TopicMessage) []pubsub.TopicMessage {
return out return out
} }
// EphemeralStateSet records WS-subscribe-tracked ephemeral state for the
// current invocation's WS client and publishes a "set" event (bugboard #710).
// The owning client ID and namespace are derived from the invocation context —
// the function cannot spoof them. Auto-clears on the client's WS disconnect.
func (h *HostFunctions) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
if h.ephemeralStore == nil {
return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
}
cur := h.currentInvocationContext(ctx)
if cur == nil {
return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: fmt.Errorf("no invocation context")}
}
if err := h.ephemeralStore.Set(ctx, cur.Namespace, cur.WSClientID, topic, key, payload, ttlMs); err != nil {
return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: err}
}
return nil
}
// EphemeralStateClear removes ephemeral state the current WS client owns and
// publishes a "clear" event (bugboard #710). Idempotent.
func (h *HostFunctions) EphemeralStateClear(ctx context.Context, topic, key string) error {
if h.ephemeralStore == nil {
return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
}
cur := h.currentInvocationContext(ctx)
if cur == nil {
return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: fmt.Errorf("no invocation context")}
}
if err := h.ephemeralStore.Clear(ctx, cur.Namespace, cur.WSClientID, topic, key); err != nil {
return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: err}
}
return nil
}
// WSSend sends data to a specific WebSocket client. // WSSend sends data to a specific WebSocket client.
func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error { func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error {
if h.wsManager == nil { if h.wsManager == nil {

View File

@ -14,6 +14,9 @@ import (
"go.uber.org/zap" "go.uber.org/zap"
) )
// secretsKeyBytes is the required length of the AES-256 encryption key.
const secretsKeyBytes = 32
// DBSecretsManager implements SecretsManager using the database. // DBSecretsManager implements SecretsManager using the database.
type DBSecretsManager struct { type DBSecretsManager struct {
db rqlite.Client db rqlite.Client
@ -25,21 +28,34 @@ type DBSecretsManager struct {
var _ serverless.SecretsManager = (*DBSecretsManager)(nil) var _ serverless.SecretsManager = (*DBSecretsManager)(nil)
// NewDBSecretsManager creates a secrets manager backed by the database. // NewDBSecretsManager creates a secrets manager backed by the database.
func NewDBSecretsManager(db rqlite.Client, encryptionKeyHex string, logger *zap.Logger) (*DBSecretsManager, error) { //
// encryptionKeyHex must be a 32-byte AES-256 key, hex-encoded (64 chars).
//
// When encryptionKeyHex is empty the behaviour depends on allowEphemeral:
// - allowEphemeral=false (production): returns an error. A misconfigured
// node must fail loudly rather than silently generate a per-process
// ephemeral key. With an ephemeral key, secrets encrypted by one
// process cannot be decrypted by another (or after a restart), which
// makes get_secret return garbage/errors (bugboard #837).
// - allowEphemeral=true (tests/dev): generates a random per-process key
// and logs a warning. Secrets will not persist across restarts.
func NewDBSecretsManager(db rqlite.Client, encryptionKeyHex string, allowEphemeral bool, logger *zap.Logger) (*DBSecretsManager, error) {
var key []byte var key []byte
if encryptionKeyHex != "" { if encryptionKeyHex != "" {
var err error var err error
key, err = hex.DecodeString(encryptionKeyHex) key, err = hex.DecodeString(encryptionKeyHex)
if err != nil || len(key) != 32 { if err != nil || len(key) != secretsKeyBytes {
return nil, fmt.Errorf("invalid encryption key: must be 32 bytes hex-encoded") return nil, fmt.Errorf("invalid secrets encryption key: must be %d bytes hex-encoded (%d hex chars)", secretsKeyBytes, secretsKeyBytes*2)
} }
} else { } else if allowEphemeral {
// Generate a random key if none provided // Generate a random per-process key (dev/test only).
key = make([]byte, 32) key = make([]byte, secretsKeyBytes)
if _, err := rand.Read(key); err != nil { if _, err := rand.Read(key); err != nil {
return nil, fmt.Errorf("failed to generate encryption key: %w", err) return nil, fmt.Errorf("failed to generate ephemeral secrets encryption key: %w", err)
} }
logger.Warn("Generated random secrets encryption key - secrets will not persist across restarts") logger.Warn("Generated random ephemeral secrets encryption key - secrets will NOT persist across restarts (dev/test only)")
} else {
return nil, fmt.Errorf("secrets encryption key is required: set secrets_encryption_key (see %s/secrets/secrets-encryption-key); without it secrets cannot be decrypted across processes or restarts (bugboard #837)", "~/.orama")
} }
return &DBSecretsManager{ return &DBSecretsManager{

View File

@ -0,0 +1,199 @@
package hostfunctions
import (
"context"
"database/sql"
"errors"
"strings"
"testing"
"github.com/DeBrosOfficial/network/pkg/rqlite"
"github.com/DeBrosOfficial/network/pkg/serverless"
"go.uber.org/zap"
)
// fakeSecretsDB is an in-memory rqlite.Client stub that implements only the
// Exec/Query paths used by DBSecretsManager (INSERT...ON CONFLICT upsert and
// SELECT by namespace+name). Storing the encrypted blob in a map lets us
// round-trip a Set through a Get — the core of the bugboard #837 regression.
type fakeSecretsDB struct {
rqlite.Client
store map[string][]byte // key: namespace\x00name -> encrypted_value
}
func newFakeSecretsDB() *fakeSecretsDB {
return &fakeSecretsDB{store: map[string][]byte{}}
}
func storeKey(namespace, name string) string {
return namespace + "\x00" + name
}
// Exec handles the upsert. args order matches secrets.go Set():
// (id, namespace, name, encrypted_value, created_at, updated_at).
func (f *fakeSecretsDB) Exec(ctx context.Context, query string, args ...any) (sql.Result, error) {
if strings.Contains(query, "INSERT INTO function_secrets") {
namespace, _ := args[1].(string)
name, _ := args[2].(string)
enc, _ := args[3].([]byte)
cp := make([]byte, len(enc))
copy(cp, enc)
f.store[storeKey(namespace, name)] = cp
return fakeResult{rows: 1}, nil
}
return fakeResult{}, nil
}
// Query handles the SELECT encrypted_value ... WHERE namespace=? AND name=?.
func (f *fakeSecretsDB) Query(ctx context.Context, dest any, query string, args ...any) error {
if !strings.Contains(query, "SELECT encrypted_value") {
return errors.New("unexpected query")
}
namespace, _ := args[0].(string)
name, _ := args[1].(string)
rows, ok := dest.(*[]struct {
EncryptedValue []byte `db:"encrypted_value"`
})
if !ok {
return errors.New("unexpected dest type")
}
if enc, found := f.store[storeKey(namespace, name)]; found {
*rows = append(*rows, struct {
EncryptedValue []byte `db:"encrypted_value"`
}{EncryptedValue: enc})
}
return nil
}
type fakeResult struct{ rows int64 }
func (r fakeResult) LastInsertId() (int64, error) { return 0, nil }
func (r fakeResult) RowsAffected() (int64, error) { return r.rows, nil }
// validKey is a 32-byte AES-256 key, hex-encoded (64 chars).
const validKey = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
// otherKey is a different valid 32-byte key.
const otherKey = "fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210"
// TestDBSecretsManager_SetGetRoundTrip_sameKey proves the fix: a secret
// encrypted with a fixed key is decryptable by a SEPARATE manager constructed
// with the SAME key (simulating another process / a restart).
func TestDBSecretsManager_SetGetRoundTrip_sameKey(t *testing.T) {
db := newFakeSecretsDB()
logger := zap.NewNop()
ctx := context.Background()
writer, err := NewDBSecretsManager(db, validKey, false, logger)
if err != nil {
t.Fatalf("NewDBSecretsManager (writer) failed: %v", err)
}
if err := writer.Set(ctx, "ns1", "API_TOKEN", "s3cr3t-value"); err != nil {
t.Fatalf("Set failed: %v", err)
}
// A fresh manager with the SAME key (different process / post-restart).
reader, err := NewDBSecretsManager(db, validKey, false, logger)
if err != nil {
t.Fatalf("NewDBSecretsManager (reader) failed: %v", err)
}
got, err := reader.Get(ctx, "ns1", "API_TOKEN")
if err != nil {
t.Fatalf("Get failed: %v", err)
}
if got != "s3cr3t-value" {
t.Errorf("Get returned %q, want %q", got, "s3cr3t-value")
}
}
// TestDBSecretsManager_GetWithDifferentKey_fails proves the bug it guards
// against: a manager with a DIFFERENT key cannot decrypt — exactly what
// happened when each process generated its own ephemeral key (bugboard #837).
func TestDBSecretsManager_GetWithDifferentKey_fails(t *testing.T) {
db := newFakeSecretsDB()
logger := zap.NewNop()
ctx := context.Background()
writer, err := NewDBSecretsManager(db, validKey, false, logger)
if err != nil {
t.Fatalf("NewDBSecretsManager (writer) failed: %v", err)
}
if err := writer.Set(ctx, "ns1", "API_TOKEN", "s3cr3t-value"); err != nil {
t.Fatalf("Set failed: %v", err)
}
reader, err := NewDBSecretsManager(db, otherKey, false, logger)
if err != nil {
t.Fatalf("NewDBSecretsManager (reader) failed: %v", err)
}
if _, err := reader.Get(ctx, "ns1", "API_TOKEN"); err == nil {
t.Fatal("expected decryption to fail with a different key, got nil error")
}
}
// TestDBSecretsManager_emptyKey_isLoud verifies the production constructor
// refuses to start with an empty key (allowEphemeral=false) instead of
// silently generating an undecryptable ephemeral key.
func TestDBSecretsManager_emptyKey_isLoud(t *testing.T) {
db := newFakeSecretsDB()
_, err := NewDBSecretsManager(db, "", false, zap.NewNop())
if err == nil {
t.Fatal("expected error for empty key with allowEphemeral=false, got nil")
}
if !strings.Contains(err.Error(), "secrets encryption key is required") {
t.Errorf("unexpected error message: %v", err)
}
}
// TestDBSecretsManager_emptyKey_ephemeralAllowed verifies tests/dev can still
// opt into a per-process ephemeral key.
func TestDBSecretsManager_emptyKey_ephemeralAllowed(t *testing.T) {
db := newFakeSecretsDB()
mgr, err := NewDBSecretsManager(db, "", true, zap.NewNop())
if err != nil {
t.Fatalf("expected ephemeral key to be allowed, got error: %v", err)
}
// Ephemeral key still round-trips within the same process.
ctx := context.Background()
if err := mgr.Set(ctx, "ns1", "K", "v"); err != nil {
t.Fatalf("Set failed: %v", err)
}
got, err := mgr.Get(ctx, "ns1", "K")
if err != nil {
t.Fatalf("Get failed: %v", err)
}
if got != "v" {
t.Errorf("Get returned %q, want %q", got, "v")
}
}
// TestDBSecretsManager_invalidKey_rejected covers malformed keys (wrong
// length, non-hex) at the boundary.
func TestDBSecretsManager_invalidKey_rejected(t *testing.T) {
db := newFakeSecretsDB()
cases := map[string]string{
"too short": "abcd",
"odd hex": "abc",
"not hex": strings.Repeat("zz", 32),
"wrong bytes": "0123456789abcdef", // 8 bytes, not 32
}
for name, key := range cases {
t.Run(name, func(t *testing.T) {
if _, err := NewDBSecretsManager(db, key, false, zap.NewNop()); err == nil {
t.Fatalf("expected error for invalid key %q, got nil", key)
}
})
}
}
// TestDBSecretsManager_Get_notFound verifies the not-found sentinel survives.
func TestDBSecretsManager_Get_notFound(t *testing.T) {
db := newFakeSecretsDB()
mgr, err := NewDBSecretsManager(db, validKey, false, zap.NewNop())
if err != nil {
t.Fatalf("NewDBSecretsManager failed: %v", err)
}
if _, err := mgr.Get(context.Background(), "ns1", "missing"); !errors.Is(err, serverless.ErrSecretNotFound) {
t.Errorf("expected ErrSecretNotFound, got %v", err)
}
}

View File

@ -97,6 +97,13 @@ type HostFunctions struct {
triggerDispatcher *triggers.PubSubDispatcher triggerDispatcher *triggers.PubSubDispatcher
triggerDispatcherLock sync.RWMutex triggerDispatcherLock sync.RWMutex
// ephemeralStore backs ephemeral_state_set / ephemeral_state_clear
// (bugboard #710). Constructed in NewHostFunctions when a WS manager is
// present; nil otherwise (host fns then return an error). The store
// registers a disconnect hook on the WS manager so a client's owned state
// auto-clears the instant its WebSocket disconnects.
ephemeralStore *serverless.EphemeralStore
// Current invocation context (set per-execution) // Current invocation context (set per-execution)
invCtx *serverless.InvocationContext invCtx *serverless.InvocationContext
invCtxLock sync.RWMutex invCtxLock sync.RWMutex

View File

@ -75,6 +75,13 @@ type InvokeResponse struct {
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
DurationMS int64 `json:"duration_ms"` DurationMS int64 `json:"duration_ms"`
Retries int `json:"retries,omitempty"` Retries int `json:"retries,omitempty"`
// RawHTTP carries a verbatim HTTP response set by a RawHTTPResponse
// function via set_http_response (bugboard #835). nil for normal
// functions and for raw functions that never called set_http_response —
// the HTTP handler falls back to the standard JSON/Ack path in that case.
// Not serialized; consumed directly by the HTTP invoke handler.
RawHTTP *RawHTTPResult `json:"-"`
} }
// Invoke executes a function with automatic retry logic. // Invoke executes a function with automatic retry logic.
@ -169,6 +176,8 @@ func (i *Invoker) Invoke(ctx context.Context, req *InvokeRequest) (*InvokeRespon
} }
response.Status = InvocationStatusSuccess response.Status = InvocationStatusSuccess
// Surface any verbatim HTTP response the function set (bugboard #835).
response.RawHTTP = invCtx.RawHTTP
return response, nil return response, nil
} }

View File

@ -247,6 +247,18 @@ func (m *MockHostServices) WSPubSubUnbridge(ctx context.Context, clientID, topic
return nil return nil
} }
func (m *MockHostServices) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
return SetRawHTTPResponse(ctx, status, headers, body)
}
func (m *MockHostServices) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
return nil
}
func (m *MockHostServices) EphemeralStateClear(ctx context.Context, topic, key string) error {
return nil
}
func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error { func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
return nil return nil
} }

View File

@ -0,0 +1,142 @@
package serverless
import (
"context"
"fmt"
"sync"
)
// Raw-HTTP-response mode (bugboard #835).
//
// A function deployed with RawHTTPResponse=true can emit a verbatim HTTP
// response (status + headers + body) instead of the JSON/Ack-wrapped output
// the stateless invoke handler normally produces. This lets a namespace app
// proxy an upstream RPC (Helius / Alchemy) transparently — the function reads
// the request, calls the upstream, and replays the upstream's status, headers,
// and body byte-for-byte back to its own caller.
//
// The primitive provided here is ONLY the response carrier + the host-call
// validation. Per-user-JWT quota gating (which the ticket mentions) is the
// APP's responsibility: the function can call oh.GetCallerJwtSubject() and
// decide whether to serve. The gateway does not implement quota here.
const (
// rawHTTPMaxHeaders caps how many response headers a function may set.
// Generous for a proxy use-case (upstream RPCs return well under this)
// while bounding the per-invocation allocation a hostile function could
// force.
rawHTTPMaxHeaders = 64
// rawHTTPMaxBodyBytes caps the verbatim response body a function may set.
// 8 MiB comfortably covers JSON-RPC responses (even large getBlock /
// getProgramAccounts payloads) without letting a function buffer an
// unbounded body in gateway memory.
rawHTTPMaxBodyBytes = 8 << 20
// rawHTTPMinStatus / rawHTTPMaxStatus bound a valid HTTP status code.
rawHTTPMinStatus = 100
rawHTTPMaxStatus = 599
)
// RawHTTPResult is a verbatim HTTP response set by a RawHTTPResponse function.
// Set is true once the function has called set_http_response at least once;
// the invoke handler only takes the raw path when Set is true (otherwise it
// falls back to the normal JSON/Ack-wrapped behavior).
type RawHTTPResult struct {
Status int
Headers map[string]string
Body []byte
Set bool
}
// rawHTTPCollector is the mutable per-invocation sink the set_http_response
// host function writes to. It rides the invocation's context (same per-call
// propagation model as the publish counter and log buffer) so concurrent
// invocations never cross-write each other's response.
type rawHTTPCollector struct {
mu sync.Mutex
result RawHTTPResult
}
// rawHTTPKey is the unexported context-value key for the raw-HTTP collector.
type rawHTTPKey struct{}
// WithRawHTTPCollector returns a derived ctx carrying a FRESH per-invocation
// raw-HTTP response collector. The engine attaches this before executing a
// RawHTTPResponse function so the set_http_response host call has somewhere to
// write; for non-raw functions the collector is absent and the host call is a
// validated no-op.
func WithRawHTTPCollector(ctx context.Context) context.Context {
return context.WithValue(ctx, rawHTTPKey{}, &rawHTTPCollector{})
}
// rawHTTPCollectorFromCtx extracts the collector attached via
// WithRawHTTPCollector, or nil if none is present (non-raw function, or an
// untracked code path).
func rawHTTPCollectorFromCtx(ctx context.Context) *rawHTTPCollector {
if ctx == nil {
return nil
}
c, _ := ctx.Value(rawHTTPKey{}).(*rawHTTPCollector)
return c
}
// SetRawHTTPResponse records a verbatim HTTP response on the invocation's
// collector. Returns an error if no collector is attached (the function was
// not deployed with RawHTTPResponse), or if the status / header count / body
// size fail validation. Headers may be nil. The body is copied so the caller
// (which reads it out of guest WASM memory) may reuse its buffer.
func SetRawHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
c := rawHTTPCollectorFromCtx(ctx)
if c == nil {
return fmt.Errorf("set_http_response: function is not deployed with raw_http_response enabled")
}
if status < rawHTTPMinStatus || status > rawHTTPMaxStatus {
return fmt.Errorf("set_http_response: status %d out of range [%d,%d]", status, rawHTTPMinStatus, rawHTTPMaxStatus)
}
if len(headers) > rawHTTPMaxHeaders {
return fmt.Errorf("set_http_response: too many headers (%d > %d)", len(headers), rawHTTPMaxHeaders)
}
if len(body) > rawHTTPMaxBodyBytes {
return fmt.Errorf("set_http_response: body too large (%d bytes > %d)", len(body), rawHTTPMaxBodyBytes)
}
bodyCopy := make([]byte, len(body))
copy(bodyCopy, body)
var hdrCopy map[string]string
if len(headers) > 0 {
hdrCopy = make(map[string]string, len(headers))
for k, v := range headers {
hdrCopy[k] = v
}
}
c.mu.Lock()
c.result = RawHTTPResult{
Status: status,
Headers: hdrCopy,
Body: bodyCopy,
Set: true,
}
c.mu.Unlock()
return nil
}
// TakeRawHTTPResponse returns the raw HTTP response recorded on the ctx's
// collector and whether one was set. Returns (zero, false) when no collector
// is attached or the function never called set_http_response. The engine calls
// this after Execute to surface the response on the InvokeResponse.
func TakeRawHTTPResponse(ctx context.Context) (RawHTTPResult, bool) {
c := rawHTTPCollectorFromCtx(ctx)
if c == nil {
return RawHTTPResult{}, false
}
c.mu.Lock()
res := c.result
c.mu.Unlock()
if !res.Set {
return RawHTTPResult{}, false
}
return res, true
}

View File

@ -0,0 +1,129 @@
package serverless
import (
"bytes"
"context"
"strings"
"testing"
)
func TestSetRawHTTPResponse_happyPath(t *testing.T) {
ctx := WithRawHTTPCollector(context.Background())
headers := map[string]string{"Content-Type": "application/json"}
body := []byte(`{"jsonrpc":"2.0","result":42}`)
if err := SetRawHTTPResponse(ctx, 200, headers, body); err != nil {
t.Fatalf("SetRawHTTPResponse: unexpected error: %v", err)
}
res, ok := TakeRawHTTPResponse(ctx)
if !ok {
t.Fatal("TakeRawHTTPResponse: expected a response to be set")
}
if res.Status != 200 {
t.Errorf("status = %d, want 200", res.Status)
}
if res.Headers["Content-Type"] != "application/json" {
t.Errorf("Content-Type header = %q, want application/json", res.Headers["Content-Type"])
}
if !bytes.Equal(res.Body, body) {
t.Errorf("body = %q, want %q", res.Body, body)
}
}
func TestSetRawHTTPResponse_copiesBodyAndHeaders(t *testing.T) {
ctx := WithRawHTTPCollector(context.Background())
headers := map[string]string{"X-Test": "v1"}
body := []byte("original")
if err := SetRawHTTPResponse(ctx, 200, headers, body); err != nil {
t.Fatalf("SetRawHTTPResponse: %v", err)
}
// Mutate caller-owned buffers AFTER the call — the stored copy must not change.
body[0] = 'X'
headers["X-Test"] = "mutated"
res, _ := TakeRawHTTPResponse(ctx)
if string(res.Body) != "original" {
t.Errorf("body was not copied: got %q", res.Body)
}
if res.Headers["X-Test"] != "v1" {
t.Errorf("headers were not copied: got %q", res.Headers["X-Test"])
}
}
func TestSetRawHTTPResponse_noCollector(t *testing.T) {
// No collector attached → the function is not in raw mode; must error.
err := SetRawHTTPResponse(context.Background(), 200, nil, []byte("x"))
if err == nil {
t.Fatal("expected error when no collector is attached")
}
if !strings.Contains(err.Error(), "raw_http_response") {
t.Errorf("error = %q, want it to mention raw_http_response", err.Error())
}
}
func TestSetRawHTTPResponse_rejectsBadStatus(t *testing.T) {
for _, status := range []int{0, 99, 600, 1000, -1} {
ctx := WithRawHTTPCollector(context.Background())
if err := SetRawHTTPResponse(ctx, status, nil, nil); err == nil {
t.Errorf("status %d: expected validation error, got nil", status)
}
if _, ok := TakeRawHTTPResponse(ctx); ok {
t.Errorf("status %d: response should not be set after a rejected status", status)
}
}
}
func TestSetRawHTTPResponse_rejectsTooManyHeaders(t *testing.T) {
ctx := WithRawHTTPCollector(context.Background())
headers := make(map[string]string, rawHTTPMaxHeaders+1)
for i := 0; i <= rawHTTPMaxHeaders; i++ {
headers["h"+string(rune('a'+i%26))+string(rune('0'+i/26))] = "v"
}
if len(headers) <= rawHTTPMaxHeaders {
t.Fatalf("test setup: expected > %d headers, got %d", rawHTTPMaxHeaders, len(headers))
}
if err := SetRawHTTPResponse(ctx, 200, headers, nil); err == nil {
t.Fatal("expected error for too many headers")
}
}
func TestSetRawHTTPResponse_rejectsOversizedBody(t *testing.T) {
ctx := WithRawHTTPCollector(context.Background())
body := make([]byte, rawHTTPMaxBodyBytes+1)
if err := SetRawHTTPResponse(ctx, 200, nil, body); err == nil {
t.Fatal("expected error for oversized body")
}
}
func TestTakeRawHTTPResponse_notSet(t *testing.T) {
// Collector attached but set_http_response never called → (zero, false).
ctx := WithRawHTTPCollector(context.Background())
if _, ok := TakeRawHTTPResponse(ctx); ok {
t.Fatal("expected ok=false when no response was set")
}
// No collector at all → also (zero, false).
if _, ok := TakeRawHTTPResponse(context.Background()); ok {
t.Fatal("expected ok=false with no collector")
}
}
func TestSetRawHTTPResponse_lastWriteWins(t *testing.T) {
ctx := WithRawHTTPCollector(context.Background())
if err := SetRawHTTPResponse(ctx, 200, nil, []byte("first")); err != nil {
t.Fatalf("first SetRawHTTPResponse: %v", err)
}
if err := SetRawHTTPResponse(ctx, 503, map[string]string{"Retry-After": "5"}, []byte("second")); err != nil {
t.Fatalf("second SetRawHTTPResponse: %v", err)
}
res, ok := TakeRawHTTPResponse(ctx)
if !ok {
t.Fatal("expected response to be set")
}
if res.Status != 503 || string(res.Body) != "second" || res.Headers["Retry-After"] != "5" {
t.Errorf("last-write-wins failed: got status=%d body=%q headers=%v", res.Status, res.Body, res.Headers)
}
}

View File

@ -107,8 +107,9 @@ func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmByt
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by, status, created_at, updated_at, created_by,
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) raw_http_response
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
` `
_, err = r.db.Exec(ctx, query, _, err = r.db.Exec(ctx, query,
id, fn.Name, fn.Namespace, version, wasmCID, id, fn.Name, fn.Namespace, version, wasmCID,
@ -116,6 +117,7 @@ func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmByt
fn.RetryCount, retryDelay, fn.DLQTopic, fn.RetryCount, retryDelay, fn.DLQTopic,
string(FunctionStatusActive), now, now, fn.Namespace, string(FunctionStatusActive), now, now, fn.Namespace,
fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn, fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn,
fn.RawHTTPResponse,
) )
if err != nil { if err != nil {
return nil, &DeployError{FunctionName: fn.Name, Cause: fmt.Errorf("failed to register function: %w", err)} return nil, &DeployError{FunctionName: fn.Name, Cause: fmt.Errorf("failed to register function: %w", err)}
@ -154,7 +156,8 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by, status, created_at, updated_at, created_by,
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
raw_http_response
FROM functions FROM functions
WHERE namespace = ? AND name = ? AND status = ? WHERE namespace = ? AND name = ? AND status = ?
ORDER BY version DESC ORDER BY version DESC
@ -167,7 +170,8 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by, status, created_at, updated_at, created_by,
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
raw_http_response
FROM functions FROM functions
WHERE namespace = ? AND name = ? AND version = ? WHERE namespace = ? AND name = ? AND version = ?
` `
@ -197,7 +201,8 @@ func (r *Registry) List(ctx context.Context, namespace string) ([]*Function, err
f.memory_limit_mb, f.timeout_seconds, f.is_public, f.memory_limit_mb, f.timeout_seconds, f.is_public,
f.retry_count, f.retry_delay_seconds, f.dlq_topic, f.retry_count, f.retry_delay_seconds, f.dlq_topic,
f.status, f.created_at, f.updated_at, f.created_by, f.status, f.created_at, f.updated_at, f.created_by,
f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn,
f.raw_http_response
FROM functions f FROM functions f
INNER JOIN ( INNER JOIN (
SELECT namespace, name, MAX(version) as max_version SELECT namespace, name, MAX(version) as max_version
@ -668,6 +673,11 @@ func (r *Registry) rowToFunction(row *functionRow) *Function {
WSIdleTimeoutSec: row.WSIdleTimeoutSec, WSIdleTimeoutSec: row.WSIdleTimeoutSec,
WSMaxFrameBytes: row.WSMaxFrameBytes, WSMaxFrameBytes: row.WSMaxFrameBytes,
WSMaxInflightPerConn: row.WSMaxInflightPerConn, WSMaxInflightPerConn: row.WSMaxInflightPerConn,
// Raw-HTTP-response mode (bugboard #835). Without reading this back
// the invoke handler's `if fn.RawHTTPResponse` engine branch never
// fires and set_http_response is a no-op for every function.
RawHTTPResponse: row.RawHTTPResponse,
} }
} }
@ -716,6 +726,11 @@ type functionRow struct {
WSIdleTimeoutSec int `db:"ws_idle_timeout_sec"` WSIdleTimeoutSec int `db:"ws_idle_timeout_sec"`
WSMaxFrameBytes int `db:"ws_max_frame_bytes"` WSMaxFrameBytes int `db:"ws_max_frame_bytes"`
WSMaxInflightPerConn int `db:"ws_max_inflight_per_conn"` WSMaxInflightPerConn int `db:"ws_max_inflight_per_conn"`
// Raw-HTTP-response mode (bugboard #835). Backed by migration
// 029_raw_http_response.sql; defaults to false so existing functions
// keep the JSON/Ack-wrapped behavior.
RawHTTPResponse bool `db:"raw_http_response"`
} }
type envVarRow struct { type envVarRow struct {

View File

@ -57,8 +57,9 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by, status, created_at, updated_at, created_by,
ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) raw_http_response
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
` `
_, err := s.db.Exec(ctx, query, _, err := s.db.Exec(ctx, query,
id, fn.Name, fn.Namespace, version, wasmCID, id, fn.Name, fn.Namespace, version, wasmCID,
@ -66,6 +67,7 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
fn.RetryCount, retryDelay, fn.DLQTopic, fn.RetryCount, retryDelay, fn.DLQTopic,
string(FunctionStatusActive), now, now, fn.Namespace, string(FunctionStatusActive), now, now, fn.Namespace,
fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn, fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn,
fn.RawHTTPResponse,
) )
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to save function: %w", err) return nil, fmt.Errorf("failed to save function: %w", err)
@ -101,6 +103,7 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
WSIdleTimeoutSec: fn.WSIdleTimeoutSec, WSIdleTimeoutSec: fn.WSIdleTimeoutSec,
WSMaxFrameBytes: fn.WSMaxFrameBytes, WSMaxFrameBytes: fn.WSMaxFrameBytes,
WSMaxInflightPerConn: fn.WSMaxInflightPerConn, WSMaxInflightPerConn: fn.WSMaxInflightPerConn,
RawHTTPResponse: fn.RawHTTPResponse,
}, nil }, nil
} }
@ -114,7 +117,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
if version == 0 { if version == 0 {
query = ` query = `
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by status, created_at, updated_at, created_by
@ -126,7 +129,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
args = []interface{}{namespace, name, string(FunctionStatusActive)} args = []interface{}{namespace, name, string(FunctionStatusActive)}
} else { } else {
query = ` query = `
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by status, created_at, updated_at, created_by
@ -154,7 +157,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
// GetByID retrieves a function by its ID. // GetByID retrieves a function by its ID.
func (s *FunctionStore) GetByID(ctx context.Context, id string) (*Function, error) { func (s *FunctionStore) GetByID(ctx context.Context, id string) (*Function, error) {
query := ` query := `
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by status, created_at, updated_at, created_by
@ -180,7 +183,7 @@ func (s *FunctionStore) GetByNameInternal(ctx context.Context, namespace, name s
name = strings.TrimSpace(name) name = strings.TrimSpace(name)
query := ` query := `
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by status, created_at, updated_at, created_by
@ -207,6 +210,7 @@ func (s *FunctionStore) List(ctx context.Context, namespace string) ([]*Function
query := ` query := `
SELECT f.id, f.name, f.namespace, f.version, f.wasm_cid, f.source_cid, SELECT f.id, f.name, f.namespace, f.version, f.wasm_cid, f.source_cid,
f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn, f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn,
f.raw_http_response,
f.memory_limit_mb, f.timeout_seconds, f.is_public, f.memory_limit_mb, f.timeout_seconds, f.is_public,
f.retry_count, f.retry_delay_seconds, f.dlq_topic, f.retry_count, f.retry_delay_seconds, f.dlq_topic,
f.status, f.created_at, f.updated_at, f.created_by f.status, f.created_at, f.updated_at, f.created_by
@ -238,7 +242,7 @@ func (s *FunctionStore) List(ctx context.Context, namespace string) ([]*Function
// ListVersions returns all versions of a function. // ListVersions returns all versions of a function.
func (s *FunctionStore) ListVersions(ctx context.Context, namespace, name string) ([]*Function, error) { func (s *FunctionStore) ListVersions(ctx context.Context, namespace, name string) ([]*Function, error) {
query := ` query := `
SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
memory_limit_mb, timeout_seconds, is_public, memory_limit_mb, timeout_seconds, is_public,
retry_count, retry_delay_seconds, dlq_topic, retry_count, retry_delay_seconds, dlq_topic,
status, created_at, updated_at, created_by status, created_at, updated_at, created_by
@ -399,5 +403,6 @@ func rowToFunction(row *functionRow) *Function {
WSIdleTimeoutSec: row.WSIdleTimeoutSec, WSIdleTimeoutSec: row.WSIdleTimeoutSec,
WSMaxFrameBytes: row.WSMaxFrameBytes, WSMaxFrameBytes: row.WSMaxFrameBytes,
WSMaxInflightPerConn: row.WSMaxInflightPerConn, WSMaxInflightPerConn: row.WSMaxInflightPerConn,
RawHTTPResponse: row.RawHTTPResponse,
} }
} }

View File

@ -38,6 +38,9 @@ type FunctionDefinition struct {
WSIdleTimeoutSec int WSIdleTimeoutSec int
WSMaxFrameBytes int WSMaxFrameBytes int
WSMaxInflightPerConn int WSMaxInflightPerConn int
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835).
RawHTTPResponse bool
} }
// Function represents a deployed serverless function. // Function represents a deployed serverless function.
@ -64,6 +67,9 @@ type Function struct {
WSIdleTimeoutSec int WSIdleTimeoutSec int
WSMaxFrameBytes int WSMaxFrameBytes int
WSMaxInflightPerConn int WSMaxInflightPerConn int
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835).
RawHTTPResponse bool
} }
// LogEntry represents a log message emitted from inside a WASM function // LogEntry represents a log message emitted from inside a WASM function
@ -180,6 +186,7 @@ type functionRow struct {
WSIdleTimeoutSec int WSIdleTimeoutSec int
WSMaxFrameBytes int WSMaxFrameBytes int
WSMaxInflightPerConn int WSMaxInflightPerConn int
RawHTTPResponse bool
} }
type envVarRow struct { type envVarRow struct {

View File

@ -0,0 +1,34 @@
package serverless
import (
"strings"
"testing"
)
// TestRegistryRowMapping_IncludesRawHTTPResponse guards the raw-HTTP-response
// column (bugboard #835): rowToFunction must copy raw_http_response off the DB
// row, otherwise the engine's `if fn.RawHTTPResponse` branch never attaches a
// collector and set_http_response is a permanent no-op for every function.
func TestRegistryRowMapping_IncludesRawHTTPResponse(t *testing.T) {
row := functionRow{RawHTTPResponse: true}
r := &Registry{}
fn := r.rowToFunction(&row)
if !fn.RawHTTPResponse {
t.Error("rowToFunction did not propagate RawHTTPResponse — raw-HTTP functions would silently fall back to JSON/Ack output (bugboard #835)")
}
}
// TestRegistry_QueriesRawHTTPResponseColumn is the SQL-text drift guard: the
// raw_http_response column must appear in the INSERT plus every READ-path
// SELECT, mirroring the ws_* column guard. Counted ≥5 (one INSERT + the
// Get/GetByID/List/ListVersions/getByNameInternal SELECTs).
func TestRegistry_QueriesRawHTTPResponseColumn(t *testing.T) {
source, err := readRegistrySource()
if err != nil {
t.Skipf("cannot read registry.go for SQL inspection: %v", err)
}
count := strings.Count(source, "raw_http_response")
if count < 5 {
t.Errorf("column raw_http_response appears in registry.go only %d times; expected ≥5 (INSERT + each SELECT path). A READ path probably dropped it and raw-HTTP functions will silently fall back to JSON output.", count)
}
}

View File

@ -0,0 +1,159 @@
package triggers
import (
"context"
"fmt"
"testing"
olriclib "github.com/olric-data/olric"
"github.com/olric-data/olric/stats"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"go.uber.org/zap/zaptest/observer"
)
// failingOlricClient is a minimal olric.Client whose NewDMap always errors,
// simulating an Olric backend that is configured but unavailable — the
// degraded path bugboard #555 must surface (fail-open + rate-limited WARN).
type failingOlricClient struct{}
func (failingOlricClient) NewDMap(string, ...olriclib.DMapOption) (olriclib.DMap, error) {
return nil, fmt.Errorf("olric unavailable (test)")
}
func (failingOlricClient) NewPubSub(...olriclib.PubSubOption) (*olriclib.PubSub, error) {
return nil, fmt.Errorf("not implemented")
}
func (failingOlricClient) Stats(context.Context, string, ...olriclib.StatsOption) (stats.Stats, error) {
return stats.Stats{}, fmt.Errorf("not implemented")
}
func (failingOlricClient) Ping(context.Context, string, string) (string, error) {
return "", fmt.Errorf("not implemented")
}
func (failingOlricClient) RoutingTable(context.Context) (olriclib.RoutingTable, error) {
return nil, fmt.Errorf("not implemented")
}
func (failingOlricClient) Members(context.Context) ([]olriclib.Member, error) {
return nil, fmt.Errorf("not implemented")
}
func (failingOlricClient) RefreshMetadata(context.Context) error { return nil }
func (failingOlricClient) Close(context.Context) error { return nil }
var _ olriclib.Client = failingOlricClient{}
// Bugboard #555 — duplicate push from the dispatcher firing twice.
//
// These exercise Dispatch's local-dedup short-circuit and the
// degraded-dedup WARN. They use a nil-db store: getMatches would panic on
// the nil rqlite.Client, so "did we reach getMatches?" is observable as
// "did Dispatch panic?". The local dedup runs BEFORE getMatches, so a
// deduped call must return cleanly without touching the store.
func TestDispatch_localDedupSkipsSecondInvokeSameNode(t *testing.T) {
logger := zap.NewNop()
store := NewPubSubTriggerStore(nil, logger) // nil db: getMatches panics if reached
d := NewPubSubDispatcher(store, nil, nil, nil, logger)
ns, topic, data := "anchat", "messages:new", []byte(`{"messageId":"m1"}`)
// First publish: NOT deduped → reaches getMatches → nil-db panic. We
// recover and confirm we got past the dedup gate.
reachedStore := false
func() {
defer func() {
if recover() != nil {
reachedStore = true
}
}()
d.Dispatch(context.Background(), ns, topic, data, 0)
}()
if !reachedStore {
t.Fatal("first publish must pass the dedup gate and reach the store lookup")
}
// Second IDENTICAL publish within the TTL: MUST be deduped locally and
// return BEFORE getMatches — so no panic this time.
dedupedClean := true
func() {
defer func() {
if recover() != nil {
dedupedClean = false
}
}()
d.Dispatch(context.Background(), ns, topic, data, 0)
}()
if !dedupedClean {
t.Error("BUG #555 REGRESSION: identical second publish on the same node " +
"must be deduped locally and NOT re-dispatch")
}
}
func TestDispatch_distinctPayloadsBothDispatch(t *testing.T) {
logger := zap.NewNop()
store := NewPubSubTriggerStore(nil, logger)
d := NewPubSubDispatcher(store, nil, nil, nil, logger)
ns, topic := "anchat", "messages:new"
for _, data := range [][]byte{[]byte(`{"messageId":"a"}`), []byte(`{"messageId":"b"}`)} {
reachedStore := false
func() {
defer func() {
if recover() != nil {
reachedStore = true
}
}()
d.Dispatch(context.Background(), ns, topic, data, 0)
}()
if !reachedStore {
t.Errorf("distinct payload %q must NOT be deduped — it must reach dispatch", data)
}
}
}
func TestClaimDispatch_degradedWarnWhenOlricDown(t *testing.T) {
// Olric "configured but failing" path: a non-nil client whose NewDMap
// errors. claimDispatch must STILL fire (fail-open) AND emit a WARN so
// operators can see cross-node dedup is degraded.
core, observed := observer.New(zapcore.WarnLevel)
d := &PubSubDispatcher{
logger: zap.New(core),
olricClient: failingOlricClient{},
}
if !d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x")) {
t.Fatal("claimDispatch must fail-open (true) when Olric is degraded — never drop the wake")
}
if observed.FilterMessageSnippet("dedup degraded").Len() == 0 {
t.Error("degraded Olric path must emit a WARN naming the degradation, not stay silent")
}
}
func TestClaimDispatch_degradedWarnRateLimited(t *testing.T) {
// A sustained outage must NOT flood the log: only one WARN per interval.
core, observed := observer.New(zapcore.WarnLevel)
d := &PubSubDispatcher{
logger: zap.New(core),
olricClient: failingOlricClient{},
}
for i := 0; i < 5; i++ {
d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x"))
}
if got := observed.FilterMessageSnippet("dedup degraded").Len(); got != 1 {
t.Errorf("degraded WARN must be rate-limited to 1 per interval; got %d", got)
}
}
func TestClaimDispatch_nilOlricStaysQuiet(t *testing.T) {
// nil Olric is a NORMAL single-node / cache-disabled config, not a
// degraded multi-node cluster. It must fire but NOT warn (avoid noise).
core, observed := observer.New(zapcore.WarnLevel)
d := &PubSubDispatcher{logger: zap.New(core)} // olricClient nil
if !d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x")) {
t.Fatal("nil Olric must fail-open (true)")
}
if observed.Len() != 0 {
t.Errorf("nil Olric is a normal config and must NOT emit a degraded WARN; got %d logs", observed.Len())
}
}

View File

@ -134,8 +134,24 @@ type PubSubDispatcher struct {
// stopCh signals the periodic Refresh goroutine to exit. // stopCh signals the periodic Refresh goroutine to exit.
stopCh chan struct{} stopCh chan struct{}
stopOnce sync.Once stopOnce sync.Once
// localDedup guards against a SINGLE node invoking the same publish
// twice (e.g. gossipsub self-delivery), independent of Olric health.
// Bugboard #555. Always non-nil after NewPubSubDispatcher.
localDedup *localDedupCache
// degradedDedupWarn rate-limits the "Olric dedup degraded" WARN so a
// misconfigured cluster doesn't flood the log on every publish.
// Bugboard #555.
degradedDedupMu sync.Mutex
degradedDedupLastWarn time.Time
} }
// degradedDedupWarnInterval rate-limits the cross-node dedup-degraded WARN
// (bugboard #555). One warning per interval is enough to alert operators
// without flooding the log under high publish volume.
const degradedDedupWarnInterval = 60 * time.Second
// NewPubSubDispatcher creates a new PubSub trigger dispatcher. // NewPubSubDispatcher creates a new PubSub trigger dispatcher.
// //
// The `ps` argument may be nil (e.g. in tests, or namespaces with pubsub // The `ps` argument may be nil (e.g. in tests, or namespaces with pubsub
@ -158,6 +174,7 @@ func NewPubSubDispatcher(
logger: logger, logger: logger,
subscribedKeys: make(map[string]bool), subscribedKeys: make(map[string]bool),
stopCh: make(chan struct{}), stopCh: make(chan struct{}),
localDedup: newLocalDedupCache(),
} }
} }
@ -337,6 +354,20 @@ func (d *PubSubDispatcher) Dispatch(ctx context.Context, namespace, topic string
return return
} }
// Local once-per-publish dedup (bugboard #555). gossipsub can deliver
// the SAME publish to this node's subscribe handler more than once
// (self-delivery / fan-out), and the cross-node Olric claim below is a
// no-op when Olric is down. This in-process guard ensures a SINGLE node
// never invokes the same (namespace, topic, payload) twice, regardless
// of Olric health.
dedupKey := dispatchDedupKey(namespace, topic, data)
if !d.localDedup.claim(dedupKey) {
d.logger.Debug("PubSub dispatch deduped (local duplicate on this node)",
zap.String("namespace", namespace),
zap.String("topic", topic))
return
}
// Cluster-wide once-per-publish dedup (bugboard #30). gossipsub // Cluster-wide once-per-publish dedup (bugboard #30). gossipsub
// delivers a publish to every subscribed gateway node; only the node // delivers a publish to every subscribed gateway node; only the node
// that wins the Olric claim for this (namespace, topic, payload) // that wins the Olric claim for this (namespace, topic, payload)
@ -580,7 +611,7 @@ func (d *PubSubDispatcher) claimDispatch(ctx context.Context, namespace, topic s
} }
dm, err := d.olricClient.NewDMap(dispatchDedupDMap) dm, err := d.olricClient.NewDMap(dispatchDedupDMap)
if err != nil { if err != nil {
d.logger.Debug("dispatch dedup: NewDMap failed, firing (fail-open)", zap.Error(err)) d.warnDedupDegraded("NewDMap failed", namespace, topic, err)
return true return true
} }
key := dispatchDedupKey(namespace, topic, data) key := dispatchDedupKey(namespace, topic, data)
@ -594,11 +625,39 @@ func (d *PubSubDispatcher) claimDispatch(ctx context.Context, namespace, topic s
// Any other (transient) error: fail-open and fire rather than risk a // Any other (transient) error: fail-open and fire rather than risk a
// dropped wake. Worst case is a duplicate, which is what #30 already // dropped wake. Worst case is a duplicate, which is what #30 already
// had — never worse. // had — never worse.
d.logger.Debug("dispatch dedup: claim errored, firing (fail-open)", d.warnDedupDegraded("claim Put errored", namespace, topic, err)
zap.String("topic", topic), zap.Error(err))
return true return true
} }
// warnDedupDegraded emits a rate-limited WARN announcing that cross-node
// dispatch dedup is degraded (Olric unavailable), so the cluster has fallen
// back to firing on every node that receives the publish. The local cache
// still prevents same-node duplicates, but cross-node duplicate pushes are
// possible until Olric recovers — operators need visibility, not silence
// (bugboard #555). Rate-limited so a sustained outage doesn't flood logs.
func (d *PubSubDispatcher) warnDedupDegraded(reason, namespace, topic string, err error) {
d.degradedDedupMu.Lock()
now := time.Now()
shouldWarn := now.Sub(d.degradedDedupLastWarn) >= degradedDedupWarnInterval
if shouldWarn {
d.degradedDedupLastWarn = now
}
d.degradedDedupMu.Unlock()
if !shouldWarn {
return
}
d.logger.Warn("PubSub dispatch dedup degraded: Olric unavailable, "+
"falling back to fire-on-every-node — cross-node duplicate pushes "+
"possible until the shared store recovers",
zap.String("reason", reason),
zap.String("namespace", namespace),
zap.String("topic", topic),
zap.Duration("warn_interval", degradedDedupWarnInterval),
zap.Error(err),
)
}
// InvalidateCache is now a no-op — the dispatcher no longer caches lookups. // InvalidateCache is now a no-op — the dispatcher no longer caches lookups.
// Kept on the type so callers who used it still compile. // Kept on the type so callers who used it still compile.
func (d *PubSubDispatcher) InvalidateCache(ctx context.Context, namespace, topic string) {} func (d *PubSubDispatcher) InvalidateCache(ctx context.Context, namespace, topic string) {}

View File

@ -0,0 +1,108 @@
package triggers
import (
"sync"
"time"
)
// Bugboard #555 — messages:new trigger fires twice (duplicate push).
//
// Two distinct bugs produced duplicate dispatches:
//
// 1. Cross-node fail-open: claimDispatch (dispatcher.go) coordinates
// once-per-publish dispatch via Olric, but FAILS OPEN when Olric is
// unavailable/misconfigured. On a multi-node cluster every node that
// receives the gossip publish then fires the handler → N duplicate
// invocations (AnChat: exactly 2 on a 2-reachable-node cluster).
//
// 2. Single-node self-delivery: even on one node, gossipsub can deliver a
// locally-originated publish back to the same node's subscribe handler,
// and the only guard was the cross-node Olric claim — which is a no-op
// when Olric is down.
//
// localDedupCache fixes (2) and bounds the blast radius of (1): a single
// node never invokes the SAME publish twice, regardless of Olric health.
// It is a small bounded map with per-entry TTL, keyed by the SAME string
// dispatchDedupKey produces — (namespace, topic, sha256(payload)[:16]).
//
// IDENTICAL-PAYLOAD CAVEAT: the key folds the payload hash, NOT a stable
// message id (gossipsub's message-ID isn't plumbed through the subscribe
// handler, and parsing an app-specific id would couple the dispatcher to a
// tenant's JSON schema). So two byte-identical publishes within the TTL
// window collapse to one local invocation. Real payloads carry a unique id
// (messageId/seq), so this is not a practical concern; it is the same
// trade-off documented on dispatchDedupKey.
const (
// localDedupTTL bounds how long a (namespace, topic, payload) claim is
// remembered on this node. It must cover gossipsub self-delivery /
// fan-out jitter without de-duplicating legitimately-repeated publishes
// seconds apart. Kept in lockstep with dispatchDedupTTL.
localDedupTTL = 30 * time.Second
// localDedupMaxEntries caps the cache so a high-throughput namespace
// can't grow it without bound. When the cap is hit, expired entries are
// swept first; if still full, the claim is allowed through (fail-open —
// a rare duplicate is far better than dropping a wake).
localDedupMaxEntries = 4096
)
// localDedupCache is a bounded, TTL'd set of recently-dispatched keys for a
// single node. Safe for concurrent use.
type localDedupCache struct {
mu sync.Mutex
entries map[string]time.Time // key -> expiry
ttl time.Duration
maxSize int
now func() time.Time // injectable clock for tests
}
// newLocalDedupCache builds a cache with the package default TTL and size.
func newLocalDedupCache() *localDedupCache {
return &localDedupCache{
entries: make(map[string]time.Time),
ttl: localDedupTTL,
maxSize: localDedupMaxEntries,
now: time.Now,
}
}
// claim records the key and reports whether THIS node may dispatch it now.
//
// Returns true the first time a key is seen within the TTL window (caller
// should dispatch) and false on subsequent calls within the window (caller
// should skip — it's a local duplicate).
//
// Fail-open: if the cache is at capacity and can't be swept enough to make
// room, claim returns true (allow dispatch) rather than risk dropping a
// legitimate wake.
func (c *localDedupCache) claim(key string) bool {
c.mu.Lock()
defer c.mu.Unlock()
now := c.now()
if exp, ok := c.entries[key]; ok && now.Before(exp) {
return false // seen recently → local duplicate → skip
}
// Either unseen or the previous entry expired. Sweep expired entries
// before inserting so the map doesn't accumulate dead keys.
if len(c.entries) >= c.maxSize {
c.sweepExpiredLocked(now)
}
if len(c.entries) >= c.maxSize {
// Still full of live entries — allow dispatch rather than drop.
return true
}
c.entries[key] = now.Add(c.ttl)
return true
}
// sweepExpiredLocked removes expired entries. Caller must hold c.mu.
func (c *localDedupCache) sweepExpiredLocked(now time.Time) {
for k, exp := range c.entries {
if !now.Before(exp) {
delete(c.entries, k)
}
}
}

View File

@ -0,0 +1,140 @@
package triggers
import (
"sync"
"sync/atomic"
"testing"
"time"
)
// Bugboard #555 — a SINGLE node must never invoke the same publish twice,
// independent of Olric health. These tests pin the local dedup cache's
// claim/expiry/eviction behavior.
func TestLocalDedupCache_sameKeyClaimedOncePerWindow(t *testing.T) {
c := newLocalDedupCache()
key := dispatchDedupKey("ns", "messages:new", []byte(`{"id":1}`))
if !c.claim(key) {
t.Fatal("first claim of an unseen key must fire (return true)")
}
if c.claim(key) {
t.Error("second claim within the TTL must be deduped (return false)")
}
}
func TestLocalDedupCache_distinctKeysBothFire(t *testing.T) {
c := newLocalDedupCache()
a := dispatchDedupKey("ns", "messages:new", []byte("A"))
b := dispatchDedupKey("ns", "messages:new", []byte("B"))
if !c.claim(a) {
t.Error("distinct payload A must fire")
}
if !c.claim(b) {
t.Error("distinct payload B must fire (different payload → different key)")
}
}
func TestLocalDedupCache_expiredEntryFiresAgain(t *testing.T) {
// Drive a controllable clock so we don't sleep in the test.
cur := time.Unix(1_000_000, 0)
c := newLocalDedupCache()
c.now = func() time.Time { return cur }
key := dispatchDedupKey("ns", "messages:new", []byte("x"))
if !c.claim(key) {
t.Fatal("first claim must fire")
}
if c.claim(key) {
t.Fatal("immediate re-claim must be deduped")
}
// Advance past the TTL: the entry has expired, so the same key must
// fire again (a legitimately-repeated publish seconds apart).
cur = cur.Add(localDedupTTL + time.Second)
if !c.claim(key) {
t.Error("after TTL expiry the same key must fire again")
}
}
func TestLocalDedupCache_evictsExpiredOnPressure(t *testing.T) {
cur := time.Unix(2_000_000, 0)
c := &localDedupCache{
entries: make(map[string]time.Time),
ttl: localDedupTTL,
maxSize: 4, // tiny cap to exercise the sweep path deterministically
now: func() time.Time { return cur },
}
// Fill to capacity with soon-to-expire entries.
for i := 0; i < c.maxSize; i++ {
key := dispatchDedupKey("ns", "t", []byte{byte(i)})
if !c.claim(key) {
t.Fatalf("fill claim %d must fire", i)
}
}
if len(c.entries) != c.maxSize {
t.Fatalf("expected cache full at %d, got %d", c.maxSize, len(c.entries))
}
// Advance past TTL so every existing entry is expired, then claim a new
// key: the sweep must reclaim space and the new key must be recorded.
cur = cur.Add(localDedupTTL + time.Second)
newKey := dispatchDedupKey("ns", "t", []byte("fresh"))
if !c.claim(newKey) {
t.Fatal("new key under pressure must fire")
}
if _, ok := c.entries[newKey]; !ok {
t.Error("new key must be recorded after expired entries were swept")
}
if len(c.entries) > c.maxSize {
t.Errorf("cache must not exceed maxSize after sweep; got %d", len(c.entries))
}
}
func TestLocalDedupCache_concurrentClaimsExactlyOneWins(t *testing.T) {
// Race condition guard: when many goroutines race to claim the SAME key
// (gossipsub delivering one publish across handler goroutines), exactly
// one must win. Run under -race to catch unsynchronized map access.
c := newLocalDedupCache()
key := dispatchDedupKey("ns", "messages:new", []byte(`{"id":"race"}`))
const goroutines = 64
var wins int64
var wg sync.WaitGroup
wg.Add(goroutines)
for i := 0; i < goroutines; i++ {
go func() {
defer wg.Done()
if c.claim(key) {
atomic.AddInt64(&wins, 1)
}
}()
}
wg.Wait()
if wins != 1 {
t.Errorf("exactly one concurrent claim of the same key must win; got %d", wins)
}
}
func TestLocalDedupCache_failsOpenWhenFullOfLiveEntries(t *testing.T) {
cur := time.Unix(3_000_000, 0)
c := &localDedupCache{
entries: make(map[string]time.Time),
ttl: localDedupTTL,
maxSize: 2,
now: func() time.Time { return cur },
}
// Fill with two still-live entries.
c.claim(dispatchDedupKey("ns", "t", []byte("a")))
c.claim(dispatchDedupKey("ns", "t", []byte("b")))
// A new key when the cache is full of LIVE entries must fail-open
// (fire) rather than drop a legitimate wake.
if !c.claim(dispatchDedupKey("ns", "t", []byte("c"))) {
t.Error("claim must fail-open (true) when the cache is full of live entries")
}
}

View File

@ -237,6 +237,11 @@ type FunctionDefinition struct {
WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"` // 0 = no idle timeout WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"` // 0 = no idle timeout
WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"` // 0 = use default 256 KB WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"` // 0 = use default 256 KB
WSMaxInflightPerConn int `json:"ws_max_inflight_per_conn,omitempty"` // 0 = use default 64 WSMaxInflightPerConn int `json:"ws_max_inflight_per_conn,omitempty"` // 0 = use default 64
// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835): the
// function may call set_http_response to emit a verbatim status/headers/
// body instead of the JSON/Ack-wrapped output. See pkg/serverless/raw_http.go.
RawHTTPResponse bool `json:"raw_http_response,omitempty"`
} }
// DBTriggerConfig defines a database trigger configuration. // DBTriggerConfig defines a database trigger configuration.
@ -270,6 +275,11 @@ type Function struct {
WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"` WSIdleTimeoutSec int `json:"ws_idle_timeout_sec,omitempty"`
WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"` WSMaxFrameBytes int `json:"ws_max_frame_bytes,omitempty"`
WSMaxInflightPerConn int `json:"ws_max_inflight_per_conn,omitempty"` WSMaxInflightPerConn int `json:"ws_max_inflight_per_conn,omitempty"`
// RawHTTPResponse — bugboard #835. When true, the function may emit a
// verbatim HTTP response via set_http_response instead of the
// JSON/Ack-wrapped output. See pkg/serverless/raw_http.go.
RawHTTPResponse bool `json:"raw_http_response,omitempty"`
} }
// InvocationContext provides context for a function invocation. // InvocationContext provides context for a function invocation.
@ -308,6 +318,14 @@ type InvocationContext struct {
// could create by publishing topics that match its own wildcard // could create by publishing topics that match its own wildcard
// trigger (bugboard #93 follow-up). // trigger (bugboard #93 follow-up).
TriggerDepth int `json:"trigger_depth,omitempty"` TriggerDepth int `json:"trigger_depth,omitempty"`
// RawHTTP carries a verbatim HTTP response set by a RawHTTPResponse
// function (bugboard #835). The engine populates this from the
// per-invocation collector after Execute returns; the Invoker surfaces
// it on InvokeResponse so the HTTP handler can replay it. nil/unset for
// normal functions and functions that didn't call set_http_response.
// Not serialized — internal plumbing only.
RawHTTP *RawHTTPResult `json:"-"`
} }
// InvocationResult represents the result of a function invocation. // InvocationResult represents the result of a function invocation.
@ -555,6 +573,28 @@ type HostServices interface {
// in OnClose unless they want to dynamically unsubscribe. // in OnClose unless they want to dynamically unsubscribe.
WSPubSubUnbridge(ctx context.Context, clientID, topic string) error WSPubSubUnbridge(ctx context.Context, clientID, topic string) error
// SetHTTPResponse records a verbatim HTTP response (status, headers, body)
// for a RawHTTPResponse function (bugboard #835). The HTTP invoke handler
// replays it byte-for-byte instead of the JSON/Ack-wrapped output, so a
// function can transparently proxy an upstream RPC. Returns an error when
// the function is NOT deployed with raw_http_response, or when the status /
// header count / body size fail validation. headers may be nil.
SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error
// EphemeralStateSet records WS-subscribe-tracked ephemeral state owned by
// the current invocation's WS client (bugboard #710) and publishes a "set"
// event on the topic so subscribers observe it. The state auto-clears (with
// a synthetic "clear" event) when the owning WS client disconnects, and
// also expires after ttlMs (clamped to a max; <=0 uses a default). Returns
// an error when there is no WS client in context, on empty topic/key, on an
// oversized payload, or when the client's per-connection key cap is hit.
EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error
// EphemeralStateClear removes ephemeral state the current WS client owns
// and publishes a "clear" event. Idempotent: clearing a missing or
// non-owned key is a no-op. Errors only on no-WS-client / empty topic-key.
EphemeralStateClear(ctx context.Context, topic, key string) error
// WebSocket operations (only valid in WS context) // WebSocket operations (only valid in WS context)
WSSend(ctx context.Context, clientID string, data []byte) error WSSend(ctx context.Context, clientID string, data []byte) error
WSBroadcast(ctx context.Context, topic string, data []byte) error WSBroadcast(ctx context.Context, topic string, data []byte) error

View File

@ -23,6 +23,14 @@ type WSManager struct {
subscriptions map[string]map[string]struct{} subscriptions map[string]map[string]struct{}
subscriptionsMu sync.RWMutex subscriptionsMu sync.RWMutex
// disconnectHooks run (synchronously) on Unregister for each client,
// AFTER the connection + subscriptions are torn down. Used by the
// ephemeral-state store (bugboard #710) to auto-clear a client's owned
// state on disconnect. Both the stateless and persistent WS handlers
// call Unregister, so a single hook covers both paths.
disconnectHooks []func(clientID string)
disconnectHooksMu sync.RWMutex
logger *zap.Logger logger *zap.Logger
} }
@ -102,6 +110,20 @@ func (m *WSManager) Register(clientID string, conn WebSocketConn) {
) )
} }
// AddDisconnectHook registers a callback fired (synchronously) for every
// client passed to Unregister, after its connection + subscriptions are torn
// down. Used to auto-clear WS-subscribe-tracked ephemeral state on disconnect
// (bugboard #710). Hooks must be cheap and non-blocking — they run inline on
// the WS read loop's teardown path. Register once at gateway init.
func (m *WSManager) AddDisconnectHook(hook func(clientID string)) {
if hook == nil {
return
}
m.disconnectHooksMu.Lock()
m.disconnectHooks = append(m.disconnectHooks, hook)
m.disconnectHooksMu.Unlock()
}
// Unregister removes a WebSocket connection and its subscriptions. // Unregister removes a WebSocket connection and its subscriptions.
func (m *WSManager) Unregister(clientID string) { func (m *WSManager) Unregister(clientID string) {
m.connectionsMu.Lock() m.connectionsMu.Lock()
@ -130,6 +152,14 @@ func (m *WSManager) Unregister(clientID string) {
// Close the connection // Close the connection
_ = conn.conn.Close() _ = conn.conn.Close()
// Fire disconnect hooks (ephemeral-state auto-clear, bugboard #710).
m.disconnectHooksMu.RLock()
hooks := m.disconnectHooks
m.disconnectHooksMu.RUnlock()
for _, hook := range hooks {
hook(clientID)
}
m.logger.Debug("Unregistered WebSocket connection", m.logger.Debug("Unregistered WebSocket connection",
zap.String("client_id", clientID), zap.String("client_id", clientID),
zap.Int("remaining_connections", m.GetConnectionCount()), zap.Int("remaining_connections", m.GetConnectionCount()),