feat(serverless): add raw http response mode and secrets encryption

- Add `raw_http_response` configuration to functions to allow verbatim HTTP responses - Implement cluster-wide secrets encryption key generation and distribution for serverless functions - Update documentation with UnifiedPush support for ntfy on Android/GrapheneOS
2026-06-16 22:54:12 +00:00 · 2026-06-09 13:01:02 +03:00 · 2026-06-09 13:01:02 +03:00 · f41242538e
commit f41242538e
parent aa04ab5f50
54 changed files with 3310 additions and 53 deletions
--- a/core/docs/PUSH_NOTIFICATIONS.md
+++ b/core/docs/PUSH_NOTIFICATIONS.md
@ -214,6 +214,43 @@ your client computes locally from `(namespace, userId, topic_secret)`.

 For `ntfy` with `topic_mode=path`, the token is `ns/<namespace>/<userId>`.

+### UnifiedPush (Android / GrapheneOS, no Google Play Services)
+
+ntfy is a [UnifiedPush](https://unifiedpush.org) distributor, so Android
+devices — including de-Googled **GrapheneOS** — can receive push **without
+Firebase / Google Play Services**. The flow:
+
+1. The device runs a UnifiedPush **distributor** (the ntfy Android app, or an
+   embedded distributor library) pointed at your push host
+   (`https://push.<your-zone>`).
+2. The app registers with the distributor and is handed an **endpoint URL**,
+   e.g. `https://push.<your-zone>/upXXXXXXXX`.
+3. Register that endpoint as a push device:
+
+   ```http
+   POST /v1/push/devices
+   {
+     "device_id": "<unique per-device ID>",
+     "provider":  "ntfy",
+     "token":     "https://push.<your-zone>/upXXXXXXXX",   // the full endpoint
+     "platform":  "android"
+   }
+   ```
+
+The gateway POSTs to the endpoint **verbatim** (per the UnifiedPush spec), so
+you don't have to deconstruct it. As a safety measure the endpoint's
+scheme+host **must match your configured ntfy push host** — a device token can
+only ever publish to your own push server, never an arbitrary host.
+
+You may instead register just the bare **topic** (the endpoint's last path
+segment) as the token — both forms work; use whichever your UnifiedPush library
+makes convenient.
+
+**GrapheneOS notes:** works under both "No Google Play" and "Sandboxed Google
+Play" profiles. The distributor holds the persistent connection (not your app),
+so battery impact is the distributor's; high-priority messages
+(`priority: "high"`) wake the app from Doze.
+
 ---

 ## Step 6 — Send pushes
--- a/core/migrations/029_raw_http_response.sql
+++ b/core/migrations/029_raw_http_response.sql
@ -0,0 +1,15 @@
+-- =============================================================================
+-- 029_raw_http_response.sql
+--
+-- Raw-HTTP-response serverless function mode — bugboard #835.
+--
+-- When raw_http_response is true, the function may call the set_http_response
+-- host function to emit a verbatim HTTP response (status + headers + body)
+-- instead of the JSON/Ack-wrapped output. This lets a namespace app proxy an
+-- upstream RPC (Helius / Alchemy) transparently. See pkg/serverless/raw_http.go.
+--
+-- Default false → backward compatible: existing functions keep returning the
+-- JSON/Ack-wrapped output unchanged.
+-- =============================================================================
+
+ALTER TABLE functions ADD COLUMN raw_http_response BOOLEAN DEFAULT FALSE;
--- a/core/pkg/cli/functions/helpers.go
+++ b/core/pkg/cli/functions/helpers.go
@ -32,6 +32,11 @@ type FunctionConfig struct {
 	WSIdleTimeoutSec     int  `yaml:"ws_idle_timeout_sec"`
 	WSMaxFrameBytes      int  `yaml:"ws_max_frame_bytes"`
 	WSMaxInflightPerConn int  `yaml:"ws_max_inflight_per_conn"`
+
+	// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835) — the
+	// function may call set_http_response to emit a verbatim HTTP response
+	// (status/headers/body) instead of the JSON/Ack-wrapped output.
+	RawHTTPResponse bool `yaml:"raw_http_response"`
 }

 // RetryConfig holds retry settings.
@ -226,6 +231,9 @@ func uploadWASMFunction(wasmPath string, cfg *FunctionConfig) (map[string]interf
 	if cfg.WSMaxInflightPerConn > 0 {
 		metaObj["ws_max_inflight_per_conn"] = cfg.WSMaxInflightPerConn
 	}
+	if cfg.RawHTTPResponse {
+		metaObj["raw_http_response"] = true
+	}
 	if len(metaObj) > 0 {
 		metadata, _ := json.Marshal(metaObj)
 		writer.WriteField("metadata", string(metadata))
--- a/core/pkg/cli/functions/helpers_test.go
+++ b/core/pkg/cli/functions/helpers_test.go
@ -0,0 +1,53 @@
+package functions
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// writeFunctionYAML writes a function.yaml into a fresh temp dir and returns it.
+func writeFunctionYAML(t *testing.T, body string) string {
+	t.Helper()
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "function.yaml"), []byte(body), 0o600); err != nil {
+		t.Fatalf("write function.yaml: %v", err)
+	}
+	return dir
+}
+
+func TestLoadConfig_RawHTTPResponse_true(t *testing.T) {
+	dir := writeFunctionYAML(t, "name: rpc-proxy\nraw_http_response: true\n")
+
+	cfg, err := LoadConfig(dir)
+	if err != nil {
+		t.Fatalf("LoadConfig: %v", err)
+	}
+	if !cfg.RawHTTPResponse {
+		t.Error("RawHTTPResponse = false, want true")
+	}
+}
+
+func TestLoadConfig_RawHTTPResponse_defaultsFalse(t *testing.T) {
+	dir := writeFunctionYAML(t, "name: plain-fn\n")
+
+	cfg, err := LoadConfig(dir)
+	if err != nil {
+		t.Fatalf("LoadConfig: %v", err)
+	}
+	if cfg.RawHTTPResponse {
+		t.Error("RawHTTPResponse = true, want false (omitted in yaml)")
+	}
+}
+
+func TestLoadConfig_RawHTTPResponse_explicitFalse(t *testing.T) {
+	dir := writeFunctionYAML(t, "name: plain-fn\nraw_http_response: false\n")
+
+	cfg, err := LoadConfig(dir)
+	if err != nil {
+		t.Fatalf("LoadConfig: %v", err)
+	}
+	if cfg.RawHTTPResponse {
+		t.Error("RawHTTPResponse = true, want false")
+	}
+}
--- a/core/pkg/cli/production/install/orchestrator.go
+++ b/core/pkg/cli/production/install/orchestrator.go
@ -477,6 +477,14 @@ func (o *Orchestrator) saveSecretsFromJoinResponse(resp *joinhandlers.JoinRespon
 		}
 	}

+	// Write serverless secrets encryption key (bugboard #837) — identical on
+	// every node so namespace function secrets decrypt cluster-wide.
+	if resp.SecretsEncryptionKey != "" {
+		if err := os.WriteFile(filepath.Join(secretsDir, "secrets-encryption-key"), []byte(resp.SecretsEncryptionKey), 0600); err != nil {
+			return fmt.Errorf("failed to write secrets-encryption-key: %w", err)
+		}
+	}
+
 	// Write IPFS Cluster trusted peer IDs
 	if len(resp.IPFSClusterPeerIDs) > 0 {
 		content := strings.Join(resp.IPFSClusterPeerIDs, "\n") + "\n"
--- a/core/pkg/environments/production/config.go
+++ b/core/pkg/environments/production/config.go
@ -200,6 +200,18 @@ func (cg *ConfigGenerator) GenerateNodeConfig(peerAddresses []string, vpsIP stri
 	data.Environment = cg.Environment
 	data.OperatorWallet = cg.OperatorWallet

+	// Serverless function secrets encryption key (bugboard #837). Read the
+	// persisted key (generated in Phase3 / received via join) so it is
+	// rendered into node.yaml under http_gateway. If the file is missing the
+	// key is left empty and omitted from the rendered config — get_secret then
+	// stays disabled until the operator provisions the key. We deliberately do
+	// NOT generate here: generation/distribution is owned by SecretGenerator
+	// and the join flow so every node in a cluster shares one key.
+	secretsKeyPath := filepath.Join(cg.oramaDir, "secrets", "secrets-encryption-key")
+	if keyBytes, err := os.ReadFile(secretsKeyPath); err == nil {
+		data.SecretsEncryptionKey = strings.TrimSpace(string(keyBytes))
+	}
+
 	return templates.RenderNodeConfig(data)
 }

@ -471,6 +483,55 @@ func (sg *SecretGenerator) EnsureAPIKeyHMACSecret() (string, error) {
 	return secret, nil
 }

+// EnsureSecretsEncryptionKey gets or generates the AES-256 key used to
+// encrypt serverless function secrets at rest (the function_secrets table).
+// The key is a 32-byte random value stored as 64 hex characters.
+//
+// It MUST be identical on every namespace-gateway node in a cluster and
+// stable across restarts — otherwise secrets encrypted by one process can't
+// be decrypted by another (bugboard #837). Like api-key-hmac-secret, joining
+// nodes receive this value through the join flow rather than generating their
+// own; this method only generates on the genesis node (or returns the
+// existing key if a joining node already wrote it to disk).
+func (sg *SecretGenerator) EnsureSecretsEncryptionKey() (string, error) {
+	secretPath := filepath.Join(sg.oramaDir, "secrets", "secrets-encryption-key")
+	secretDir := filepath.Dir(secretPath)
+
+	if err := os.MkdirAll(secretDir, 0700); err != nil {
+		return "", fmt.Errorf("failed to create secrets directory: %w", err)
+	}
+	if err := os.Chmod(secretDir, 0700); err != nil {
+		return "", fmt.Errorf("failed to set secrets directory permissions: %w", err)
+	}
+
+	// Try to read existing key
+	if data, err := os.ReadFile(secretPath); err == nil {
+		key := strings.TrimSpace(string(data))
+		if len(key) == 64 {
+			if err := ensureSecretFilePermissions(secretPath); err != nil {
+				return "", err
+			}
+			return key, nil
+		}
+	}
+
+	// Generate new key (32 bytes = 64 hex chars)
+	keyBytes := make([]byte, 32)
+	if _, err := rand.Read(keyBytes); err != nil {
+		return "", fmt.Errorf("failed to generate secrets encryption key: %w", err)
+	}
+	key := hex.EncodeToString(keyBytes)
+
+	if err := os.WriteFile(secretPath, []byte(key), 0600); err != nil {
+		return "", fmt.Errorf("failed to save secrets encryption key: %w", err)
+	}
+	if err := ensureSecretFilePermissions(secretPath); err != nil {
+		return "", err
+	}
+
+	return key, nil
+}
+
 func ensureSecretFilePermissions(secretPath string) error {
 	if err := os.Chmod(secretPath, 0600); err != nil {
 		return fmt.Errorf("failed to set permissions on %s: %w", secretPath, err)
--- a/core/pkg/environments/production/orchestrator.go
+++ b/core/pkg/environments/production/orchestrator.go
@ -593,6 +593,12 @@ func (ps *ProductionSetup) Phase3GenerateSecrets() error {
 	}
 	ps.logf("  ✓ API key HMAC secret ensured")

+	// Serverless function secrets encryption key (bugboard #837)
+	if _, err := ps.secretGenerator.EnsureSecretsEncryptionKey(); err != nil {
+		return fmt.Errorf("failed to ensure secrets encryption key: %w", err)
+	}
+	ps.logf("  ✓ Secrets encryption key ensured")
+
 	// Node identity (unified architecture)
 	peerID, err := ps.secretGenerator.EnsureNodeIdentity()
 	if err != nil {
--- a/core/pkg/environments/production/secrets_encryption_key_test.go
+++ b/core/pkg/environments/production/secrets_encryption_key_test.go
@ -0,0 +1,80 @@
+package production
+
+import (
+	"encoding/hex"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+// TestEnsureSecretsEncryptionKey_generatesAndPersists verifies that a fresh
+// oramaDir produces a valid 32-byte hex key written to disk.
+func TestEnsureSecretsEncryptionKey_generatesAndPersists(t *testing.T) {
+	dir := t.TempDir()
+	sg := NewSecretGenerator(dir)
+
+	key, err := sg.EnsureSecretsEncryptionKey()
+	if err != nil {
+		t.Fatalf("EnsureSecretsEncryptionKey failed: %v", err)
+	}
+	if len(key) != 64 {
+		t.Fatalf("expected 64 hex chars, got %d (%q)", len(key), key)
+	}
+	raw, err := hex.DecodeString(key)
+	if err != nil || len(raw) != 32 {
+		t.Fatalf("key is not 32 bytes hex: err=%v len=%d", err, len(raw))
+	}
+
+	// Persisted to the expected path.
+	data, err := os.ReadFile(filepath.Join(dir, "secrets", "secrets-encryption-key"))
+	if err != nil {
+		t.Fatalf("reading persisted key failed: %v", err)
+	}
+	if strings.TrimSpace(string(data)) != key {
+		t.Errorf("persisted key %q != returned key %q", strings.TrimSpace(string(data)), key)
+	}
+}
+
+// TestEnsureSecretsEncryptionKey_idempotent verifies the key is stable across
+// calls — this is the property that makes secrets survive restarts and stay
+// identical across cluster nodes (bugboard #837).
+func TestEnsureSecretsEncryptionKey_idempotent(t *testing.T) {
+	dir := t.TempDir()
+	sg := NewSecretGenerator(dir)
+
+	first, err := sg.EnsureSecretsEncryptionKey()
+	if err != nil {
+		t.Fatalf("first call failed: %v", err)
+	}
+	second, err := sg.EnsureSecretsEncryptionKey()
+	if err != nil {
+		t.Fatalf("second call failed: %v", err)
+	}
+	if first != second {
+		t.Errorf("key changed between calls: %q != %q", first, second)
+	}
+}
+
+// TestEnsureSecretsEncryptionKey_regeneratesInvalid verifies a corrupt/empty
+// on-disk key (wrong length) is replaced with a fresh valid one.
+func TestEnsureSecretsEncryptionKey_regeneratesInvalid(t *testing.T) {
+	dir := t.TempDir()
+	secretsDir := filepath.Join(dir, "secrets")
+	if err := os.MkdirAll(secretsDir, 0700); err != nil {
+		t.Fatalf("mkdir failed: %v", err)
+	}
+	keyPath := filepath.Join(secretsDir, "secrets-encryption-key")
+	if err := os.WriteFile(keyPath, []byte("too-short"), 0600); err != nil {
+		t.Fatalf("write failed: %v", err)
+	}
+
+	sg := NewSecretGenerator(dir)
+	key, err := sg.EnsureSecretsEncryptionKey()
+	if err != nil {
+		t.Fatalf("EnsureSecretsEncryptionKey failed: %v", err)
+	}
+	if len(key) != 64 {
+		t.Errorf("expected regenerated 64-char key, got %d (%q)", len(key), key)
+	}
+}
--- a/core/pkg/environments/templates/node.yaml
+++ b/core/pkg/environments/templates/node.yaml
@ -88,6 +88,12 @@ http_gateway:
  ipfs_cluster_api_url: "http://localhost:{{.ClusterAPIPort}}"
  ipfs_api_url: "http://localhost:{{.IPFSAPIPort}}"
  ipfs_timeout: "60s"
+{{- if .SecretsEncryptionKey}}
+  # Serverless function secrets encryption key (AES-256, hex). Must be
+  # identical on every namespace-gateway node and stable across restarts
+  # (bugboard #837). Sourced from ~/.orama/secrets/secrets-encryption-key.
+  secrets_encryption_key: "{{.SecretsEncryptionKey}}"
+{{- end}}
  
  # Routes for internal service reverse proxy (kept for backwards compatibility but not used by full gateway)
  routes: {}
--- a/core/pkg/environments/templates/render.go
+++ b/core/pkg/environments/templates/render.go
@ -46,6 +46,15 @@ type NodeConfigData struct {
 	SSHUser        string // SSH user for remote management
 	Environment    string // Environment name (devnet, testnet, etc.)
 	OperatorWallet string // Operator wallet address
+
+	// SecretsEncryptionKey is the AES-256 key (hex, 64 chars) used to encrypt
+	// serverless function secrets at rest. Rendered under http_gateway in
+	// node.yaml. Sourced from ~/.orama/secrets/secrets-encryption-key — must
+	// be identical across all namespace-gateway nodes in a cluster and stable
+	// across restarts (bugboard #837). Empty → key omitted from the rendered
+	// config (the gateway then reads the secret file directly / get_secret
+	// stays disabled until the key is configured).
+	SecretsEncryptionKey string
 }

 // GatewayConfigData holds parameters for gateway.yaml rendering
--- a/core/pkg/environments/templates/render_test.go
+++ b/core/pkg/environments/templates/render_test.go
@ -41,6 +41,32 @@ func TestRenderNodeConfig(t *testing.T) {
 	}
 }

+func TestRenderNodeConfig_secretsEncryptionKey(t *testing.T) {
+	const key = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
+
+	// Happy path: key present → rendered under http_gateway.
+	withKey, err := RenderNodeConfig(NodeConfigData{
+		NodeID:               "node1",
+		SecretsEncryptionKey: key,
+	})
+	if err != nil {
+		t.Fatalf("RenderNodeConfig failed: %v", err)
+	}
+	want := "secrets_encryption_key: \"" + key + "\""
+	if !strings.Contains(withKey, want) {
+		t.Errorf("rendered node config missing secrets key line %q\n---\n%s", want, withKey)
+	}
+
+	// Edge case: empty key → line omitted entirely (no empty value rendered).
+	withoutKey, err := RenderNodeConfig(NodeConfigData{NodeID: "node1"})
+	if err != nil {
+		t.Fatalf("RenderNodeConfig failed: %v", err)
+	}
+	if strings.Contains(withoutKey, "secrets_encryption_key") {
+		t.Errorf("empty key should omit secrets_encryption_key line, got:\n%s", withoutKey)
+	}
+}
+
 func TestRenderGatewayConfig(t *testing.T) {
 	bootstrapMultiaddr := "/ip4/127.0.0.1/tcp/4001/p2p/Qm1234567890"
 	data := GatewayConfigData{
--- a/core/pkg/gateway/config.go
+++ b/core/pkg/gateway/config.go
@ -51,6 +51,14 @@ type Config struct {
 	// Loaded from ~/.orama/secrets/api-key-hmac-secret.
 	APIKeyHMACSecret string

+	// SecretsEncryptionKey is the AES-256 key (32 bytes, hex-encoded → 64
+	// hex chars) used to encrypt serverless function secrets at rest in the
+	// function_secrets table. It MUST be identical on every namespace-gateway
+	// node in a cluster and stable across restarts — otherwise secrets
+	// encrypted by one process cannot be decrypted by another (bugboard #837).
+	// Loaded from ~/.orama/secrets/secrets-encryption-key.
+	SecretsEncryptionKey string
+
 	// WebRTC configuration (set when namespace has WebRTC enabled).
 	//
 	// WebRTCEnabled is RETAINED for back-compat with operator YAML and
--- a/core/pkg/gateway/dependencies.go
+++ b/core/pkg/gateway/dependencies.go
@ -469,9 +469,17 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
 	engineCfg.MaxTimeoutSeconds = 60
 	engineCfg.ModuleCacheSize = 100

-	// Create secrets manager for serverless functions (AES-256-GCM encrypted)
+	// Create secrets manager for serverless functions (AES-256-GCM encrypted).
+	//
+	// The encryption key comes from the gateway Config (loaded from
+	// ~/.orama/secrets/secrets-encryption-key), NOT from engineCfg — engineCfg
+	// never has the key set, so passing it always produced a per-process
+	// ephemeral key and made get_secret return undecryptable values
+	// (bugboard #837). allowEphemeral=false: a missing/invalid key fails
+	// loudly here and disables get_secret rather than silently corrupting
+	// secrets.
 	var secretsMgr serverless.SecretsManager
-	if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, engineCfg.SecretsEncryptionKey, logger.Logger); secretsErr != nil {
+	if smImpl, secretsErr := hostfunctions.NewDBSecretsManager(deps.ORMClient, cfg.SecretsEncryptionKey, false, logger.Logger); secretsErr != nil {
 		logger.ComponentWarn(logging.ComponentGeneral, "Failed to initialize secrets manager; get_secret will be unavailable",
 			zap.Error(secretsErr))
 	} else {
--- a/core/pkg/gateway/handlers/join/handler.go
+++ b/core/pkg/gateway/handlers/join/handler.go
@ -39,6 +39,9 @@ type JoinResponse struct {
 	APIKeyHMACSecret string `json:"api_key_hmac_secret,omitempty"`
 	RQLitePassword      string `json:"rqlite_password,omitempty"`
 	OlricEncryptionKey  string `json:"olric_encryption_key,omitempty"`
+	// Serverless secrets encryption key (bugboard #837) — must be identical on
+	// every node so namespace function secrets decrypt cluster-wide.
+	SecretsEncryptionKey string `json:"secrets_encryption_key,omitempty"`

 	// Cluster join info (all using WG IPs)
 	RQLiteJoinAddress  string   `json:"rqlite_join_address"`
@ -200,6 +203,13 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) {
 		olricEncryptionKey = strings.TrimSpace(string(data))
 	}

+	// Read serverless secrets encryption key (optional — may not exist on
+	// older clusters; bugboard #837)
+	secretsEncryptionKey := ""
+	if data, err := os.ReadFile(h.oramaDir + "/secrets/secrets-encryption-key"); err == nil {
+		secretsEncryptionKey = strings.TrimSpace(string(data))
+	}
+
 	// 7. Get this node's WG IP (needed before peer list to check self-inclusion)
 	myWGIP, err := h.getMyWGIP()
 	if err != nil {
@ -271,6 +281,7 @@ func (h *Handler) HandleJoin(w http.ResponseWriter, r *http.Request) {
 		APIKeyHMACSecret:   apiKeyHMACSecret,
 		RQLitePassword:     rqlitePassword,
 		OlricEncryptionKey: olricEncryptionKey,
+		SecretsEncryptionKey: secretsEncryptionKey,
 		RQLiteJoinAddress:  fmt.Sprintf("%s:7001", myWGIP),
 		IPFSPeer:           ipfsPeer,
 		IPFSClusterPeer:    ipfsClusterPeer,
--- a/core/pkg/gateway/handlers/push/config_handler.go
+++ b/core/pkg/gateway/handlers/push/config_handler.go
@ -17,7 +17,6 @@ import (
 	"encoding/json"
 	"errors"
 	"net/http"
-	"strings"
 	"time"

 	"github.com/DeBrosOfficial/network/pkg/push"
@ -136,13 +135,13 @@ func (h *Handlers) PutConfigHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}

-	// Validate URL fields look reasonable. We don't do hostname resolution
-	// here (slow, flaky); just reject obviously-wrong schemes.
+	// Reject a base URL that targets an internal/reserved host — a tenant must
+	// not be able to turn the gateway's push sender into an SSRF proxy (cloud
+	// metadata, WireGuard mesh, loopback). This is the config-SET path, so the
+	// DNS-resolving check is fine here; the hot send path never runs it.
 	if body.NtfyBaseURL != nil && *body.NtfyBaseURL != "" {
-		if !strings.HasPrefix(*body.NtfyBaseURL, "http://") &&
-			!strings.HasPrefix(*body.NtfyBaseURL, "https://") {
-			writeError(w, http.StatusBadRequest,
-				"ntfy_base_url must start with http:// or https://")
+		if err := push.CheckBaseURLResolvable(r.Context(), *body.NtfyBaseURL); err != nil {
+			writeError(w, http.StatusBadRequest, "ntfy_base_url rejected: "+err.Error())
 			return
 		}
 	}
--- a/core/pkg/gateway/handlers/push/resolve_caller_test.go
+++ b/core/pkg/gateway/handlers/push/resolve_caller_test.go
@ -0,0 +1,63 @@
+package push
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	authsvc "github.com/DeBrosOfficial/network/pkg/gateway/auth"
+	"github.com/DeBrosOfficial/network/pkg/gateway/ctxkeys"
+)
+
+// Bugboard #548 — a push device must be keyed on the stable identity (rootId)
+// when the app provides one, not the wallet credential that authenticated the
+// session. resolveCallerUserID prefers the `root_id` custom claim and falls
+// back to the JWT subject so single-credential apps keep working.
+
+func reqWithClaims(t *testing.T, claims *authsvc.JWTClaims) *http.Request {
+	t.Helper()
+	r := httptest.NewRequest(http.MethodGet, "/", nil)
+	ctx := r.Context()
+	if claims != nil {
+		ctx = context.WithValue(ctx, ctxkeys.JWT, claims)
+	}
+	return r.WithContext(ctx)
+}
+
+func TestResolveCallerUserID_prefersRootIDClaim(t *testing.T) {
+	r := reqWithClaims(t, &authsvc.JWTClaims{
+		Sub:    "0xWALLET",
+		Custom: map[string]string{rootIDClaim: "root-uuid-123"},
+	})
+	if got := resolveCallerUserID(r); got != "root-uuid-123" {
+		t.Errorf("want rootId from claim, got %q", got)
+	}
+}
+
+func TestResolveCallerUserID_fallsBackToSubject(t *testing.T) {
+	// No custom claim → wallet subject (back-compat for single-credential apps).
+	r := reqWithClaims(t, &authsvc.JWTClaims{Sub: "0xWALLET"})
+	if got := resolveCallerUserID(r); got != "0xWALLET" {
+		t.Errorf("want wallet subject fallback, got %q", got)
+	}
+}
+
+func TestResolveCallerUserID_emptyRootIDFallsBack(t *testing.T) {
+	// An empty root_id must not collapse identity to "" — fall back to subject.
+	r := reqWithClaims(t, &authsvc.JWTClaims{
+		Sub:    "0xWALLET",
+		Custom: map[string]string{rootIDClaim: ""},
+	})
+	if got := resolveCallerUserID(r); got != "0xWALLET" {
+		t.Errorf("want wallet fallback on empty root_id, got %q", got)
+	}
+}
+
+func TestResolveCallerUserID_noJWTReturnsEmpty(t *testing.T) {
+	// API-key-only request (no JWT in context) → empty.
+	r := reqWithClaims(t, nil)
+	if got := resolveCallerUserID(r); got != "" {
+		t.Errorf("want empty for API-key-only request, got %q", got)
+	}
+}
--- a/core/pkg/gateway/handlers/push/types.go
+++ b/core/pkg/gateway/handlers/push/types.go
@ -141,11 +141,27 @@ func resolveNamespace(r *http.Request) string {
 	return ""
 }

-// resolveCallerUserID extracts the JWT subject (typically the wallet) of
-// the caller, or empty if the request was authenticated by API key only.
+// rootIDClaim is the custom JWT claim an app may set to carry the stable
+// identity (rootId) that a device should be keyed on, independent of which
+// wallet credential authenticated the session. See bugboard #548.
+const rootIDClaim = "root_id"
+
+// resolveCallerUserID extracts the identity a push device should be keyed on.
+//
+// In a multi-credential app (anchat), the JWT subject is the *wallet* — a
+// credential, not the identity. A single user (rootId) with N linked wallets
+// would otherwise register N device rows and receive N duplicate pushes
+// (bugboard #548). When the app includes a stable `root_id` custom claim, we
+// key on that; otherwise we fall back to the subject (wallet) so single-
+// credential apps and older tokens keep working unchanged.
+//
+// Returns empty if the request was authenticated by API key only (no JWT).
 func resolveCallerUserID(r *http.Request) string {
 	if v := r.Context().Value(ctxkeys.JWT); v != nil {
 		if claims, ok := v.(*auth.JWTClaims); ok && claims != nil {
+			if rootID, ok := claims.Custom[rootIDClaim]; ok && rootID != "" {
+				return rootID
+			}
 			return claims.Sub
 		}
 	}
--- a/core/pkg/gateway/handlers/serverless/invoke_handler.go
+++ b/core/pkg/gateway/handlers/serverless/invoke_handler.go
@ -145,6 +145,27 @@ func (h *ServerlessHandlers) InvokeFunction(w http.ResponseWriter, r *http.Reque
 	w.Header().Set("X-Request-ID", resp.RequestID)
 	w.Header().Set("X-Duration-Ms", strconv.FormatInt(resp.DurationMS, 10))

+	// Raw-HTTP-response mode (bugboard #835): when a function deployed with
+	// raw_http_response actually set a response via set_http_response, replay
+	// it verbatim (status + headers + body) and skip the sniff/wrap path. If
+	// the function set nothing, RawHTTP is nil and we fall through to the
+	// normal behavior unchanged.
+	if resp.RawHTTP != nil {
+		for k, v := range resp.RawHTTP.Headers {
+			// A tenant function must not overwrite gateway-owned trace/auth
+			// headers or framing-control (hop-by-hop) headers via its raw
+			// response — that would let it forge request IDs, leak/spoof
+			// internal-auth headers, or corrupt response framing.
+			if isReservedResponseHeader(k) {
+				continue
+			}
+			w.Header().Set(k, v)
+		}
+		w.WriteHeader(resp.RawHTTP.Status)
+		w.Write(resp.RawHTTP.Body)
+		return
+	}
+
 	// Try to detect if output is JSON
 	if len(resp.Output) > 0 && (resp.Output[0] == '{' || resp.Output[0] == '[') {
 		w.Header().Set("Content-Type", "application/json")
@ -256,3 +277,32 @@ func (h *ServerlessHandlers) ListVersions(w http.ResponseWriter, r *http.Request
 		"count":    len(versions),
 	})
 }
+
+// reservedResponseHeaders are response headers a raw-HTTP-response tenant
+// function (bugboard #835) must not be able to set or overwrite: gateway-owned
+// trace/auth headers and hop-by-hop / framing-control headers. Compared
+// case-insensitively; the X-Internal- prefix is matched separately.
+var reservedResponseHeaders = map[string]struct{}{
+	"x-request-id":        {},
+	"x-duration-ms":       {},
+	"content-length":      {},
+	"transfer-encoding":   {},
+	"connection":          {},
+	"keep-alive":          {},
+	"proxy-authenticate":  {},
+	"proxy-authorization": {},
+	"te":                  {},
+	"trailer":             {},
+	"upgrade":             {},
+}
+
+// isReservedResponseHeader reports whether a tenant-supplied response header key
+// is reserved for the gateway and must be ignored in raw-HTTP-response mode.
+func isReservedResponseHeader(key string) bool {
+	k := strings.ToLower(strings.TrimSpace(key))
+	if _, ok := reservedResponseHeaders[k]; ok {
+		return true
+	}
+	// Any internal-auth header the gateway uses for inter-service trust.
+	return strings.HasPrefix(k, "x-internal-")
+}
--- a/core/pkg/gateway/handlers/serverless/raw_http_headers_test.go
+++ b/core/pkg/gateway/handlers/serverless/raw_http_headers_test.go
@ -0,0 +1,31 @@
+package serverless
+
+import "testing"
+
+// Bugboard #835 hardening (flagged by code + security review): a raw-HTTP
+// tenant function must not be able to set/overwrite gateway-owned trace/auth
+// headers or hop-by-hop framing headers.
+
+func TestIsReservedResponseHeader(t *testing.T) {
+	reserved := []string{
+		"X-Request-ID", "x-request-id", "X-Duration-Ms",
+		"Content-Length", "Transfer-Encoding", "Connection", "Keep-Alive",
+		"Proxy-Authenticate", "Proxy-Authorization", "TE", "Trailer", "Upgrade",
+		"X-Internal-Auth", "x-internal-anything", "  X-Request-Id  ",
+	}
+	for _, h := range reserved {
+		if !isReservedResponseHeader(h) {
+			t.Errorf("isReservedResponseHeader(%q) = false; want true (must be protected)", h)
+		}
+	}
+
+	allowed := []string{
+		"Content-Type", "Cache-Control", "X-Custom", "ETag",
+		"Access-Control-Allow-Origin", "Location", "Retry-After",
+	}
+	for _, h := range allowed {
+		if isReservedResponseHeader(h) {
+			t.Errorf("isReservedResponseHeader(%q) = true; want false (tenant may set it)", h)
+		}
+	}
+}
--- a/core/pkg/node/gateway.go
+++ b/core/pkg/node/gateway.go
@ -58,6 +58,15 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
 		rqlitePassword = strings.TrimSpace(string(secretBytes))
 	}

+	// Read the serverless secrets encryption key (bugboard #837). Must be the
+	// SAME value on every namespace-gateway node so a secret encrypted by one
+	// process decrypts on another; an empty value makes get_secret fail loudly
+	// (the manager refuses an ephemeral key in production).
+	secretsEncryptionKey := ""
+	if secretBytes, err := os.ReadFile(filepath.Join(oramaDir, "secrets", "secrets-encryption-key")); err == nil {
+		secretsEncryptionKey = strings.TrimSpace(string(secretBytes))
+	}
+
 	gwCfg := &gateway.Config{
 		ListenAddr:           n.config.HTTPGateway.ListenAddr,
 		ClientNamespace:      n.config.HTTPGateway.ClientNamespace,
@ -75,6 +84,7 @@ func (n *Node) startHTTPGateway(ctx context.Context) error {
 		RQLitePassword:       rqlitePassword,
 		ClusterSecret:        clusterSecret,
 		APIKeyHMACSecret:     apiKeyHMACSecret,
+		SecretsEncryptionKey: secretsEncryptionKey,
 		WebRTCEnabled:        n.config.HTTPGateway.WebRTC.Enabled,
 		SFUPort:              n.config.HTTPGateway.WebRTC.SFUPort,
 		TURNDomain:           n.config.HTTPGateway.WebRTC.TURNDomain,
--- a/core/pkg/push/manager.go
+++ b/core/pkg/push/manager.go
@ -296,7 +296,17 @@ func (m *Manager) buildDispatcher(ctx context.Context, namespace string) (*PushD
 			// (DELETE) — there's no "set this field to empty to clear"
 			// half-state, by design.
 			if nc.NtfyBaseURL != "" {
-				eff.NtfyBaseURL = nc.NtfyBaseURL
+				// Defense-in-depth: a base URL stored before the SSRF guard
+				// existed (or via any path that skipped it) must not point at an
+				// internal/reserved literal IP. Drop the override and fall back
+				// to the gateway default if it does. Literal-only (no DNS, no
+				// syntax re-validation) so this stays safe on the hot build path.
+				if IsInternalBaseURL(nc.NtfyBaseURL) {
+					m.logger.Warn("push: ignoring namespace ntfy_base_url override (internal address)",
+						zap.String("namespace", namespace), zap.String("base_url", nc.NtfyBaseURL))
+				} else {
+					eff.NtfyBaseURL = nc.NtfyBaseURL
+				}
 			}
 			if nc.NtfyAuthToken != "" {
 				eff.NtfyAuthToken = nc.NtfyAuthToken
--- a/core/pkg/push/providers/ntfy/credentials.go
+++ b/core/pkg/push/providers/ntfy/credentials.go
@ -16,10 +16,11 @@ package ntfy
 // migration window, with the new credentials store taking precedence.

 import (
+	"context"
 	"encoding/json"
 	"fmt"
-	"strings"

+	"github.com/DeBrosOfficial/network/pkg/push"
 	"github.com/DeBrosOfficial/network/pkg/push/credentials"
 )

@ -87,7 +88,17 @@ func (Validator) Validate(raw []byte) error {
 	if err := json.Unmarshal(raw, &c); err != nil {
 		return fmt.Errorf("ntfy credentials: invalid JSON: %w", err)
 	}
-	return validateCredentials(c)
+	if err := validateCredentials(c); err != nil {
+		return err
+	}
+	// Validate is the config-SET path (the hot build path uses ParseCredentials,
+	// which skips DNS), so the resolving SSRF check is safe here: reject a
+	// base_url whose host resolves to an internal/reserved address. Fail-open on
+	// resolution error — see push.CheckBaseURLResolvable.
+	if err := push.CheckBaseURLResolvable(context.Background(), c.BaseURL); err != nil {
+		return fmt.Errorf("ntfy credentials: %w", err)
+	}
+	return nil
 }

 // Redact returns a JSON-safe view that never echoes the auth token or
@ -127,10 +138,12 @@ func ParseCredentials(raw []byte) (Credentials, error) {
 // validateCredentials is the shared validator used by both Validate and
 // ParseCredentials.
 func validateCredentials(c Credentials) error {
-	if c.BaseURL != "" {
-		if !strings.HasPrefix(c.BaseURL, "http://") && !strings.HasPrefix(c.BaseURL, "https://") {
-			return fmt.Errorf("ntfy credentials: base_url must start with http:// or https:// (got %q)", c.BaseURL)
-		}
+	// Literal-IP SSRF guard + scheme check. Runs on BOTH the set and the hot
+	// build path (no DNS), so a stored internal-literal base_url is also
+	// rejected when the dispatcher is (re)built. The DNS-resolving check lives
+	// in Validate (set path only).
+	if err := push.CheckBaseURLSyntax(c.BaseURL); err != nil {
+		return fmt.Errorf("ntfy credentials: %w", err)
 	}
 	if c.TopicMode != "" {
 		switch c.TopicMode {
--- a/core/pkg/push/providers/ntfy/credentials_test.go
+++ b/core/pkg/push/providers/ntfy/credentials_test.go
@ -26,7 +26,10 @@ func TestValidator_RejectsBadBaseURL(t *testing.T) {
 }

 func TestValidator_AcceptsHttpAndHttps(t *testing.T) {
-	for _, base := range []string{"http://push.local:8080", "https://push.example.com"} {
+	// Literal public (documentation-range) IPs so the test is deterministic and
+	// never hits real DNS — Validate now does a set-time SSRF resolve for
+	// hostname base URLs.
+	for _, base := range []string{"http://203.0.113.10:8080", "https://203.0.113.10"} {
 		body, _ := json.Marshal(Credentials{BaseURL: base})
 		if err := NewValidator().Validate(body); err != nil {
 			t.Errorf("base_url=%q rejected: %v", base, err)
@ -34,6 +37,21 @@ func TestValidator_AcceptsHttpAndHttps(t *testing.T) {
 	}
 }

+func TestValidator_RejectsInternalBaseURL(t *testing.T) {
+	// SSRF guard: a tenant must not point the push base URL at an internal /
+	// reserved address. Literal IPs are rejected without DNS.
+	for _, base := range []string{
+		"http://169.254.169.254", // cloud metadata
+		"http://127.0.0.1:8090",  // loopback (the operator's local ntfy)
+		"http://10.0.0.5",        // WireGuard mesh
+	} {
+		body, _ := json.Marshal(Credentials{BaseURL: base})
+		if err := NewValidator().Validate(body); err == nil {
+			t.Errorf("internal base_url %q must be rejected (SSRF)", base)
+		}
+	}
+}
+
 func TestValidator_RejectsBadTopicMode(t *testing.T) {
 	if err := NewValidator().Validate([]byte(`{"topic_mode":"random"}`)); err == nil {
 		t.Error("expected rejection of unknown topic_mode")
--- a/core/pkg/push/providers/ntfy/ntfy.go
+++ b/core/pkg/push/providers/ntfy/ntfy.go
@ -74,15 +74,10 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
 		return fmt.Errorf("ntfy: base URL not configured")
 	}

-	// URL-escape each path segment of the device token. ntfy topics can be
-	// hierarchical (e.g. "ns/myapp/user-1") and we want to preserve those
-	// '/' separators while escaping any other special characters that
-	// could let a malicious token escape the topic path.
-	parts := strings.Split(msg.DeviceToken, "/")
-	for i, p := range parts {
-		parts[i] = url.PathEscape(p)
+	endpointURL, err := p.resolveEndpoint(msg.DeviceToken)
+	if err != nil {
+		return err
 	}
-	endpointURL := p.baseURL + "/" + strings.Join(parts, "/")

 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpointURL, strings.NewReader(msg.Body))
 	if err != nil {
@ -130,3 +125,58 @@ func (p *Provider) Send(ctx context.Context, msg push.PushMessage) error {
 	_, _ = io.Copy(io.Discard, io.LimitReader(resp.Body, 4096))
 	return nil
 }
+
+// resolveEndpoint maps a device token to the ntfy publish URL.
+//
+// The token is one of two shapes:
+//
+//   - A plain ntfy topic (possibly hierarchical, e.g. "ns/myapp/user-1") —
+//     published to "<baseURL>/<topic>", with each path segment escaped so a
+//     crafted token can't break out of the topic path.
+//   - A full UnifiedPush endpoint URL handed to the client by the ntfy
+//     distributor (e.g. "https://push.example.com/up<random>"). UnifiedPush
+//     requires the application server to POST to that endpoint verbatim, so we
+//     use it as-is — but ONLY after verifying its scheme+host match the
+//     configured base URL. That check turns a device-supplied token into an
+//     SSRF only against our own push host, never an arbitrary one.
+func (p *Provider) resolveEndpoint(token string) (string, error) {
+	topic := token
+	if isAbsoluteHTTPURL(token) {
+		u, err := url.Parse(token)
+		if err != nil {
+			return "", fmt.Errorf("ntfy: invalid endpoint url: %w", err)
+		}
+		base, err := url.Parse(p.baseURL)
+		if err != nil {
+			return "", fmt.Errorf("ntfy: invalid base url %q: %w", p.baseURL, err)
+		}
+		if !strings.EqualFold(u.Scheme, base.Scheme) || !strings.EqualFold(u.Host, base.Host) {
+			// Reject an endpoint pointing anywhere other than the configured
+			// push host — a device token must never become an SSRF vector.
+			return "", fmt.Errorf("ntfy: endpoint host %q does not match configured push host %q", u.Host, base.Host)
+		}
+		// Confine the URL form to the SAME publish surface as a bare topic:
+		// take only the path as the topic and re-build through the per-segment
+		// escaping below, dropping any query/fragment. So a UnifiedPush
+		// endpoint token can publish a topic but can't gain arbitrary path or
+		// query control on the push host beyond what a plain topic already has.
+		topic = strings.TrimPrefix(u.Path, "/")
+		if topic == "" {
+			return "", fmt.Errorf("ntfy: endpoint url %q has no topic path", token)
+		}
+	}
+
+	// Escape each path segment, preserving the '/' hierarchy.
+	parts := strings.Split(topic, "/")
+	for i, seg := range parts {
+		parts[i] = url.PathEscape(seg)
+	}
+	return p.baseURL + "/" + strings.Join(parts, "/"), nil
+}
+
+// isAbsoluteHTTPURL reports whether s looks like an absolute http(s) URL (the
+// UnifiedPush endpoint form) rather than a bare ntfy topic.
+func isAbsoluteHTTPURL(s string) bool {
+	lower := strings.ToLower(s)
+	return strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://")
+}
--- a/core/pkg/push/providers/ntfy/ntfy_test.go
+++ b/core/pkg/push/providers/ntfy/ntfy_test.go
@ -7,6 +7,7 @@ import (
 	"io"
 	"net/http"
 	"net/http/httptest"
+	"net/url"
 	"strings"
 	"testing"
 	"time"
@ -183,6 +184,108 @@ func TestSend_no_baseURL_returns_error(t *testing.T) {
 	}
 }

+// feat-32: an Android/GrapheneOS UnifiedPush device registers the full endpoint
+// URL its distributor hands it. UnifiedPush requires the app server to POST to
+// that endpoint verbatim, and we must do so ONLY when the host matches our
+// configured push server (never an arbitrary host → no SSRF).
+
+func TestSend_unifiedPush_endpoint_published(t *testing.T) {
+	var gotPath, gotBody string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotPath = r.URL.Path
+		b, _ := io.ReadAll(r.Body)
+		gotBody = string(b)
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	p := New(Config{BaseURL: srv.URL}, nil)
+	// The distributor hands the client a full endpoint on the SAME (push) host.
+	endpoint := srv.URL + "/upAbc123"
+	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: endpoint, Body: "payload"}); err != nil {
+		t.Fatalf("Send: %v", err)
+	}
+	if gotPath != "/upAbc123" {
+		t.Errorf("UnifiedPush endpoint must publish to its topic path; got %q", gotPath)
+	}
+	if gotBody != "payload" {
+		t.Errorf("body not delivered; got %q", gotBody)
+	}
+}
+
+func TestSend_unifiedPush_endpoint_confined_to_topic(t *testing.T) {
+	// A URL token must be confined to the same publish surface as a bare topic:
+	// the path becomes the topic, and any query string is dropped — so it can't
+	// gain arbitrary path/query control on the push host.
+	var gotPath, gotQuery string
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		gotPath = r.URL.Path
+		gotQuery = r.URL.RawQuery
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	p := New(Config{BaseURL: srv.URL}, nil)
+	endpoint := srv.URL + "/uptopic?admin=1&x=y"
+	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: endpoint, Body: "x"}); err != nil {
+		t.Fatalf("Send: %v", err)
+	}
+	if gotPath != "/uptopic" {
+		t.Errorf("path must be the topic only; got %q", gotPath)
+	}
+	if gotQuery != "" {
+		t.Errorf("query string must be dropped (no arbitrary query on push host); got %q", gotQuery)
+	}
+}
+
+func TestSend_unifiedPush_endpoint_rejects_userinfo_bypass(t *testing.T) {
+	// Classic SSRF guard bypass: smuggle the real host into userinfo. url.Parse
+	// resolves the authority to the attacker host, so it must be rejected.
+	hit := false
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		hit = true
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	// base host = srv host; token tries "<srvhost>@attacker.example.com".
+	base, _ := url.Parse(srv.URL)
+	p := New(Config{BaseURL: srv.URL}, nil)
+	token := base.Scheme + "://" + base.Host + "@attacker.example.com/x"
+	if err := p.Send(context.Background(), push.PushMessage{DeviceToken: token, Body: "x"}); err == nil {
+		t.Fatal("expected rejection of a userinfo-smuggled host")
+	}
+	if hit {
+		t.Error("no request must be sent for a userinfo-bypass token")
+	}
+}
+
+func TestSend_unifiedPush_endpoint_rejects_foreign_host(t *testing.T) {
+	hit := false
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		hit = true
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer srv.Close()
+
+	p := New(Config{BaseURL: srv.URL}, nil)
+	// A device token pointing at a DIFFERENT host must be rejected before any
+	// request is made — a device token must never become an SSRF vector.
+	err := p.Send(context.Background(), push.PushMessage{
+		DeviceToken: "https://attacker.example.com/steal",
+		Body:        "x",
+	})
+	if err == nil {
+		t.Fatal("expected an error for an endpoint whose host doesn't match the push host")
+	}
+	if hit {
+		t.Error("no request must be sent when the endpoint host doesn't match")
+	}
+	if !strings.Contains(err.Error(), "does not match") {
+		t.Errorf("error should explain the host mismatch; got %v", err)
+	}
+}
+
 func TestName(t *testing.T) {
 	p := New(Config{BaseURL: "http://x"}, nil)
 	if p.Name() != "ntfy" {
--- a/core/pkg/push/url_guard.go
+++ b/core/pkg/push/url_guard.go
@ -0,0 +1,193 @@
+package push
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"net"
+	"net/url"
+	"strings"
+	"time"
+)
+
+// url_guard.go — SSRF guard for TENANT-supplied push base URLs.
+//
+// A tenant can override the ntfy base URL the gateway POSTs to (BYO-ntfy is a
+// legitimate use case). Without a guard, a tenant could point it at an internal
+// address — cloud metadata (169.254.169.254), the WireGuard mesh (10.0.0.x),
+// loopback — turning the gateway's push sender into an SSRF proxy. These checks
+// reject internal/reserved targets while still allowing real external hosts.
+//
+// IMPORTANT: apply these ONLY to tenant-supplied base URLs (the per-namespace
+// override). The operator's gateway default (e.g. 127.0.0.1:8090, the local
+// ntfy) is trusted and must NOT pass through here — it would be (correctly)
+// rejected as loopback.
+
+// baseURLDNSTimeout bounds the hostname-resolution step in CheckBaseURLResolvable.
+const baseURLDNSTimeout = 5 * time.Second
+
+// lookupIP resolves a host to its IPs. A package var so tests can substitute a
+// deterministic resolver instead of touching real DNS.
+var lookupIP = func(ctx context.Context, host string) ([]net.IP, error) {
+	addrs, err := net.DefaultResolver.LookupIPAddr(ctx, host)
+	if err != nil {
+		return nil, err
+	}
+	ips := make([]net.IP, len(addrs))
+	for i, a := range addrs {
+		ips[i] = a.IP
+	}
+	return ips, nil
+}
+
+// CheckBaseURLSyntax validates a tenant base URL's scheme and rejects a host
+// that is a LITERAL internal/reserved IP. It does NOT resolve hostnames, so it
+// is safe to call on hot paths (e.g. per-send dispatcher construction). An
+// empty base URL is allowed — it means "use the gateway default".
+func CheckBaseURLSyntax(baseURL string) error {
+	if baseURL == "" {
+		return nil
+	}
+	u, err := url.Parse(baseURL)
+	if err != nil {
+		return fmt.Errorf("base_url: invalid URL: %w", err)
+	}
+	if u.Scheme != "http" && u.Scheme != "https" {
+		return fmt.Errorf("base_url: must start with http:// or https:// (got scheme %q)", u.Scheme)
+	}
+	host := u.Hostname()
+	if host == "" {
+		return fmt.Errorf("base_url: missing host")
+	}
+	if ip := net.ParseIP(host); ip != nil {
+		if isReservedIP(ip) {
+			return fmt.Errorf("base_url: host %s is a reserved/internal address and is not allowed", host)
+		}
+		return nil
+	}
+	// net.ParseIP only accepts canonical dotted-decimal / standard IPv6, but the
+	// OS resolver + net.Dial ALSO accept decimal ("2130706433"), hex
+	// ("0x7f000001") and octal ("0177.0.0.1") IPv4 encodings — a literal-check
+	// bypass to internal addresses. Reject these non-standard numeric hosts
+	// outright (no legitimate push host is all-numeric or 0x-hex).
+	if looksLikeNumericHost(host) {
+		return fmt.Errorf("base_url: host %q is a non-standard numeric/IP encoding and is not allowed", host)
+	}
+	return nil
+}
+
+// CheckBaseURLResolvable runs CheckBaseURLSyntax AND, when the host is a name
+// rather than a literal IP, resolves it (bounded) and rejects if ANY resolved
+// address is internal/reserved — blocking a tenant from pointing a domain at an
+// internal host. It performs DNS, so call it ONLY at config-set time (the PUT
+// handlers), never on the hot send path.
+//
+// Resolution failure FAILS OPEN (allowed): an unresolvable host reaches nothing
+// (delivery would fail anyway), and rejecting it would break a legitimate host
+// that's momentarily unresolvable at config time. The hard floor is
+// CheckBaseURLSyntax's literal-IP block, which applies on every code path.
+//
+// Residual: as a set-time check it does not defend against DNS rebinding (the
+// host re-pointing to an internal IP AFTER it was accepted). Closing that would
+// require a send-time IP check, which is complicated here by the operator's
+// loopback default ntfy.
+func CheckBaseURLResolvable(ctx context.Context, baseURL string) error {
+	if err := CheckBaseURLSyntax(baseURL); err != nil {
+		return err
+	}
+	if baseURL == "" {
+		return nil
+	}
+	u, _ := url.Parse(baseURL) // already validated by CheckBaseURLSyntax
+	host := u.Hostname()
+	if net.ParseIP(host) != nil {
+		return nil // literal IP already vetted by CheckBaseURLSyntax
+	}
+
+	rctx, cancel := context.WithTimeout(ctx, baseURLDNSTimeout)
+	defer cancel()
+	ips, err := lookupIP(rctx, host)
+	if err != nil || len(ips) == 0 {
+		return nil // fail open on resolution failure (see doc)
+	}
+	for _, ip := range ips {
+		if isReservedIP(ip) {
+			return fmt.Errorf("base_url: host %q resolves to reserved/internal address %s and is not allowed", host, ip)
+		}
+	}
+	return nil
+}
+
+// IsInternalBaseURL reports whether baseURL parses to a host that is a LITERAL
+// internal/reserved IP. Malformed URLs and hostname URLs return false — this is
+// the no-false-positive guard for hot paths (e.g. dispatcher build), where the
+// goal is only to drop an internal-address override, not to re-validate syntax
+// or do DNS (the set-path handlers cover those).
+func IsInternalBaseURL(baseURL string) bool {
+	u, err := url.Parse(baseURL)
+	if err != nil {
+		return false
+	}
+	host := u.Hostname()
+	if ip := net.ParseIP(host); ip != nil {
+		return isReservedIP(ip)
+	}
+	// Non-standard numeric encodings (decimal/hex/octal) that net.ParseIP misses
+	// but net.Dial resolves to an IP — treat as internal so the build-path guard
+	// matches what the dialer would actually reach.
+	return looksLikeNumericHost(host)
+}
+
+// isReservedIP reports whether ip is in a range a tenant must never be able to
+// reach via a push base URL: loopback, link-local (incl. 169.254.169.254 cloud
+// metadata), RFC1918 private, ULA, unspecified, multicast, and 100.64/10 CGNAT.
+func isReservedIP(ip net.IP) bool {
+	if ip == nil {
+		return true // unparseable → treat as unsafe
+	}
+	if ip4 := ip.To4(); ip4 != nil {
+		// 100.64.0.0/10 — carrier-grade NAT (not covered by IsPrivate). The
+		// second-octet band [64,127] is the /10.
+		if ip4[0] == 100 && ip4[1] >= 64 && ip4[1] <= 127 {
+			return true
+		}
+	} else if ip16 := ip.To16(); ip16 != nil {
+		// NAT64 well-known prefix 64:ff9b::/96 (RFC 6052) embeds an IPv4 address
+		// a NAT64 gateway would translate — so it can reach internal v4.
+		if bytes.Equal(ip16[:12], []byte{0x00, 0x64, 0xff, 0x9b, 0, 0, 0, 0, 0, 0, 0, 0}) {
+			return true
+		}
+	}
+	return ip.IsLoopback() ||
+		ip.IsLinkLocalUnicast() ||
+		ip.IsLinkLocalMulticast() ||
+		ip.IsInterfaceLocalMulticast() ||
+		ip.IsMulticast() ||
+		ip.IsPrivate() || // 10/8, 172.16/12, 192.168/16, fc00::/7
+		ip.IsUnspecified()
+}
+
+// looksLikeNumericHost reports whether host is a non-standard numeric IPv4
+// encoding — hex ("0x7f000001", "0x7f.0.0.1"), decimal ("2130706433"), or octal
+// ("0177.0.0.1") — that net.ParseIP rejects but the OS resolver and net.Dial
+// accept (resolving to a real, possibly internal, IPv4). Such hosts are never a
+// legitimate push server name, so callers reject them rather than let them slip
+// past the literal-IP guard. Hosts containing any letter (other than a leading
+// "0x") are treated as ordinary DNS names and return false.
+func looksLikeNumericHost(host string) bool {
+	if host == "" {
+		return false
+	}
+	if strings.HasPrefix(strings.ToLower(host), "0x") {
+		return true // hex literal
+	}
+	// All-numeric (optionally dotted) host that net.ParseIP already failed to
+	// accept: a decimal or octal IPv4 encoding (or a malformed all-numeric
+	// dotted form). Either way, not a real hostname.
+	for _, r := range host {
+		if r != '.' && (r < '0' || r > '9') {
+			return false
+		}
+	}
+	return true
+}
--- a/core/pkg/push/url_guard_test.go
+++ b/core/pkg/push/url_guard_test.go
@ -0,0 +1,160 @@
+package push
+
+import (
+	"context"
+	"errors"
+	"net"
+	"testing"
+)
+
+// SSRF guard for tenant push base URLs. These pin: literal internal/reserved IPs
+// are rejected, the cloud-metadata IP is rejected, legit external hosts pass,
+// and a hostname that RESOLVES to an internal address is rejected (the DNS
+// vector) while a public-resolving host passes.
+
+func TestCheckBaseURLSyntax(t *testing.T) {
+	cases := []struct {
+		url     string
+		wantErr bool
+	}{
+		{"", false},                          // empty = use default
+		{"https://push.example.com", false},  // public host
+		{"http://push.example.com:8090", false},
+		{"https://1.1.1.1", false},           // public literal IP
+		{"https://[2606:4700:4700::1111]", false}, // public v6
+		{"ftp://push.example.com", true},     // bad scheme
+		{"notaurl", true},                    // no scheme/host
+		{"http://", true},                    // missing host
+		{"http://169.254.169.254", true},     // cloud metadata (link-local)
+		{"http://127.0.0.1", true},           // loopback
+		{"http://127.0.0.1:8090", true},      // loopback + port
+		{"http://10.0.0.5", true},            // RFC1918 (WireGuard mesh)
+		{"http://192.168.1.1", true},         // RFC1918
+		{"http://172.16.0.1", true},          // RFC1918
+		{"http://100.64.0.1", true},          // CGNAT
+		{"http://0.0.0.0", true},             // unspecified
+		{"http://[::1]", true},               // v6 loopback
+		{"http://[fd00::1]", true},           // v6 ULA
+		{"http://[64:ff9b::a00:5]", true},    // NAT64-embedded 10.0.0.5
+		{"http://0x7f000001", true},          // hex-encoded 127.0.0.1
+		{"http://2130706433", true},          // decimal-encoded 127.0.0.1
+		{"http://0177.0.0.1", true},          // octal-encoded 127.0.0.1
+	}
+	for _, tc := range cases {
+		err := CheckBaseURLSyntax(tc.url)
+		if tc.wantErr && err == nil {
+			t.Errorf("CheckBaseURLSyntax(%q) = nil; want error", tc.url)
+		}
+		if !tc.wantErr && err != nil {
+			t.Errorf("CheckBaseURLSyntax(%q) = %v; want nil", tc.url, err)
+		}
+	}
+}
+
+func TestIsReservedIP(t *testing.T) {
+	reserved := []string{
+		"127.0.0.1", "169.254.169.254", "10.0.0.1", "172.16.5.5", "192.168.0.1",
+		"100.64.0.1", "100.100.100.200", "0.0.0.0", "224.0.0.1", "::1", "fe80::1",
+		"fd00::1", "ff02::1",
+		"64:ff9b::a00:1",     // NAT64-embedded 10.0.0.1
+		"64:ff9b::a9fe:a9fe", // NAT64-embedded 169.254.169.254 (metadata)
+	}
+	public := []string{"1.1.1.1", "8.8.8.8", "203.0.113.10", "2606:4700:4700::1111"}
+	for _, s := range reserved {
+		if ip := net.ParseIP(s); !isReservedIP(ip) {
+			t.Errorf("isReservedIP(%s) = false; want true (reserved)", s)
+		}
+	}
+	for _, s := range public {
+		if ip := net.ParseIP(s); isReservedIP(ip) {
+			t.Errorf("isReservedIP(%s) = true; want false (public)", s)
+		}
+	}
+	if !isReservedIP(nil) {
+		t.Error("isReservedIP(nil) must be true (unparseable → unsafe)")
+	}
+}
+
+func TestIsInternalBaseURL(t *testing.T) {
+	internal := []string{
+		"http://10.0.0.5", "http://169.254.169.254",
+		"https://127.0.0.1:8090", "http://[::1]", "http://192.168.1.1",
+		"http://[64:ff9b::a00:5]", // NAT64
+		"http://0x7f000001",       // hex-encoded loopback
+		"http://2130706433",       // decimal-encoded loopback
+		"http://0177.0.0.1",       // octal-encoded loopback
+	}
+	notInternal := []string{
+		"https://push.example.com", // hostname → false (the set path resolves it)
+		"https://1.1.1.1",          // public literal IP
+		"ns-A-url",                 // malformed placeholder → must NOT be dropped
+		"v1", "", "not a url",
+	}
+	for _, s := range internal {
+		if !IsInternalBaseURL(s) {
+			t.Errorf("IsInternalBaseURL(%q) = false; want true (internal literal IP)", s)
+		}
+	}
+	for _, s := range notInternal {
+		if IsInternalBaseURL(s) {
+			t.Errorf("IsInternalBaseURL(%q) = true; want false", s)
+		}
+	}
+}
+
+func TestCheckBaseURLResolvable(t *testing.T) {
+	orig := lookupIP
+	defer func() { lookupIP = orig }()
+
+	t.Run("hostname resolving to internal is rejected", func(t *testing.T) {
+		lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
+			return []net.IP{net.ParseIP("10.0.0.7")}, nil // points at the mesh
+		}
+		if err := CheckBaseURLResolvable(context.Background(), "https://evil.example.com"); err == nil {
+			t.Fatal("expected rejection of a host resolving to an internal address")
+		}
+	})
+
+	t.Run("hostname resolving to public is allowed", func(t *testing.T) {
+		lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
+			return []net.IP{net.ParseIP("203.0.113.50")}, nil
+		}
+		if err := CheckBaseURLResolvable(context.Background(), "https://push.example.com"); err != nil {
+			t.Fatalf("public-resolving host should pass: %v", err)
+		}
+	})
+
+	t.Run("any internal IP among results is rejected", func(t *testing.T) {
+		lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
+			return []net.IP{net.ParseIP("203.0.113.50"), net.ParseIP("127.0.0.1")}, nil
+		}
+		if err := CheckBaseURLResolvable(context.Background(), "https://mixed.example.com"); err == nil {
+			t.Fatal("a host resolving to ANY internal address must be rejected")
+		}
+	})
+
+	t.Run("resolution failure is allowed (fail open)", func(t *testing.T) {
+		lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
+			return nil, errors.New("nxdomain")
+		}
+		if err := CheckBaseURLResolvable(context.Background(), "https://unresolvable.example.com"); err != nil {
+			t.Fatalf("an unresolvable host should fail open (be allowed); got %v", err)
+		}
+	})
+
+	t.Run("literal internal IP rejected without DNS", func(t *testing.T) {
+		lookupIP = func(_ context.Context, host string) ([]net.IP, error) {
+			t.Fatal("DNS must not be consulted for a literal IP host")
+			return nil, nil
+		}
+		if err := CheckBaseURLResolvable(context.Background(), "http://169.254.169.254"); err == nil {
+			t.Fatal("literal metadata IP must be rejected")
+		}
+	})
+
+	t.Run("empty is allowed", func(t *testing.T) {
+		if err := CheckBaseURLResolvable(context.Background(), ""); err != nil {
+			t.Fatalf("empty base_url should pass: %v", err)
+		}
+	})
+}
--- a/core/pkg/serverless/engine.go
+++ b/core/pkg/serverless/engine.go
@ -2,6 +2,7 @@ package serverless

 import (
 	"context"
+	cryptorand "crypto/rand"
 	"errors"
 	"fmt"
 	"time"
@ -318,6 +319,15 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
 	// gates invocation frequency, not per-invocation host-call volume).
 	execCtx = WithPublishCounter(execCtx)

+	// Raw-HTTP-response mode (bugboard #835). Only RawHTTPResponse functions
+	// get a collector attached — set_http_response is a validated no-op for
+	// every other function (no collector → host call returns an error). The
+	// collector rides execCtx so concurrent invocations never cross-write,
+	// matching the publish-counter / log-buffer per-call model.
+	if fn.RawHTTPResponse {
+		execCtx = WithRawHTTPCollector(execCtx)
+	}
+
 	// Get compiled module (from cache or compile)
 	module, err := e.getOrCompileModule(execCtx, fn.WASMCID)
 	if err != nil {
@ -346,6 +356,14 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
 		return nil, &ExecutionError{FunctionName: fn.Name, RequestID: invCtx.RequestID, Cause: err}
 	}

+	// Surface any verbatim HTTP response the function set (bugboard #835)
+	// onto invCtx so the Invoker → HTTP handler can replay it. Only
+	// RawHTTPResponse functions have a collector attached; TakeRawHTTPResponse
+	// returns (_, false) otherwise.
+	if res, ok := TakeRawHTTPResponse(execCtx); ok {
+		invCtx.RawHTTP = &res
+	}
+
 	e.logInvocation(ctx, fn, invCtx, logBuf, startTime, len(output), InvocationStatusSuccess, nil)
 	e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, "success", nil)
 	return output, nil
@ -547,7 +565,13 @@ func (e *Engine) InstantiatePersistent(ctx context.Context, fn *Function, invCtx
 		// into real clocks via the documented wazero hook — same effect as
 		// the runtime would get on a normal Go process.
 		WithSysWalltime().
-		WithSysNanotime()
+		WithSysNanotime().
+		// Bugboard #120 — same class as #27. Without WithRandSource, wazero's
+		// default RNG is deterministic (zero seed), so TinyGo crypto/rand.Read
+		// returns identical bytes on every fresh instance — constant codes /
+		// nonces / tokens. Wire in the host CSPRNG. Same fix at
+		// execution/executor.go for the stateless path.
+		WithRandSource(cryptorand.Reader)

 	instance, err := e.runtime.InstantiateModule(ctx, compiled, moduleConfig)
 	if err != nil {
@ -742,6 +766,7 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
 			NewFunctionBuilder().WithFunc(e.hCacheIncrBy).Export("cache_incr_by").
 			NewFunctionBuilder().WithFunc(e.hHTTPFetch).Export("http_fetch").
 			NewFunctionBuilder().WithFunc(e.hAnyoneFetch).Export("anyone_fetch").
+			NewFunctionBuilder().WithFunc(e.hSetHTTPResponse).Export("set_http_response").
 			NewFunctionBuilder().WithFunc(e.hPubSubPublish).Export("pubsub_publish").
 			NewFunctionBuilder().WithFunc(e.hPubSubPublishBatch).Export("pubsub_publish_batch").
 			NewFunctionBuilder().WithFunc(e.hPushSend).Export("push_send").
@ -751,6 +776,8 @@ func (e *Engine) registerHostModule(ctx context.Context) error {
 			NewFunctionBuilder().WithFunc(e.hWSPubSubUnbridge).Export("ws_pubsub_unbridge").
 			NewFunctionBuilder().WithFunc(e.hWSSend).Export("ws_send").
 			NewFunctionBuilder().WithFunc(e.hWSBroadcast).Export("ws_broadcast").
+			NewFunctionBuilder().WithFunc(e.hEphemeralStateSet).Export("ephemeral_state_set").
+			NewFunctionBuilder().WithFunc(e.hEphemeralStateClear).Export("ephemeral_state_clear").
 			NewFunctionBuilder().WithFunc(e.hFunctionInvoke).Export("function_invoke").
 			NewFunctionBuilder().WithFunc(e.hFunctionInvokeAsync).Export("function_invoke_async").
 			NewFunctionBuilder().WithFunc(e.hLogInfo).Export("log_info").
@ -948,6 +975,40 @@ func (e *Engine) hHTTPFetch(ctx context.Context, mod api.Module, methodPtr, meth
 	return e.executor.WriteToGuest(ctx, mod, resp)
 }

+// hSetHTTPResponse is the WASM-callable wrapper for SetHTTPResponse —
+// bugboard #835 raw-HTTP-response mode.
+//
+// ABI: set_http_response(status i32, headersJSONPtr, headersJSONLen,
+// bodyPtr, bodyLen uint32) -> uint32. headersJSON (when non-empty) is a JSON
+// object of string→string. Returns 1 on success, 0 on failure (function not
+// deployed with raw_http_response, bad status, oversized headers/body, or a
+// guest-memory read error).
+func (e *Engine) hSetHTTPResponse(ctx context.Context, mod api.Module,
+	status, headersPtr, headersLen, bodyPtr, bodyLen uint32) uint32 {
+	var headers map[string]string
+	if headersLen > 0 {
+		if err := e.executor.UnmarshalJSONFromGuest(mod, headersPtr, headersLen, &headers); err != nil {
+			e.logger.Warn("set_http_response: failed to unmarshal headers", zap.Error(err))
+			return 0
+		}
+	}
+
+	var body []byte
+	if bodyLen > 0 {
+		b, ok := e.executor.ReadFromGuest(mod, bodyPtr, bodyLen)
+		if !ok {
+			return 0
+		}
+		body = b
+	}
+
+	if err := e.hostServices.SetHTTPResponse(ctx, int(status), headers, body); err != nil {
+		e.logger.Warn("host function set_http_response failed", zap.Error(err))
+		return 0
+	}
+	return 1
+}
+
 // hAnyoneFetch is the WASM-callable wrapper for AnyoneFetch — feat-11.
 // Identical ABI to hHTTPFetch (method, url, headers JSON, body), routes
 // through the Anyone SOCKS5 proxy. Returns packed (ptr<<32 | len) to the
@ -1291,6 +1352,67 @@ func (e *Engine) hWSBroadcast(ctx context.Context, mod api.Module,
 	return 1
 }

+// hEphemeralStateSet is the WASM-callable wrapper for EphemeralStateSet —
+// bugboard #710 WS-subscribe-tracked ephemeral state.
+//
+// ABI: ephemeral_state_set(topicPtr, topicLen, keyPtr, keyLen, payloadPtr,
+// payloadLen uint32, ttlMs int64) -> uint32. Returns 1 on success, 0 on
+// failure (no WS client in context, empty topic/key, oversized payload,
+// per-client key cap, or a guest-memory read error).
+func (e *Engine) hEphemeralStateSet(ctx context.Context, mod api.Module,
+	topicPtr, topicLen, keyPtr, keyLen, payloadPtr, payloadLen uint32, ttlMs int64) uint32 {
+	topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
+	if !ok {
+		return 0
+	}
+	key, ok := e.executor.ReadFromGuest(mod, keyPtr, keyLen)
+	if !ok {
+		return 0
+	}
+	var payload []byte
+	if payloadLen > 0 {
+		p, ok := e.executor.ReadFromGuest(mod, payloadPtr, payloadLen)
+		if !ok {
+			return 0
+		}
+		payload = p
+	}
+	if err := e.hostServices.EphemeralStateSet(ctx, string(topic), string(key), payload, ttlMs); err != nil {
+		e.logger.Warn("host function ephemeral_state_set failed",
+			zap.String("topic", string(topic)),
+			zap.String("key", string(key)),
+			zap.Error(err))
+		return 0
+	}
+	return 1
+}
+
+// hEphemeralStateClear is the WASM-callable wrapper for EphemeralStateClear.
+//
+// ABI: ephemeral_state_clear(topicPtr, topicLen, keyPtr, keyLen uint32) ->
+// uint32. Returns 1 on success (including idempotent clears of a missing key),
+// 0 on failure (no WS client in context, empty topic/key, or a guest-memory
+// read error).
+func (e *Engine) hEphemeralStateClear(ctx context.Context, mod api.Module,
+	topicPtr, topicLen, keyPtr, keyLen uint32) uint32 {
+	topic, ok := e.executor.ReadFromGuest(mod, topicPtr, topicLen)
+	if !ok {
+		return 0
+	}
+	key, ok := e.executor.ReadFromGuest(mod, keyPtr, keyLen)
+	if !ok {
+		return 0
+	}
+	if err := e.hostServices.EphemeralStateClear(ctx, string(topic), string(key)); err != nil {
+		e.logger.Warn("host function ephemeral_state_clear failed",
+			zap.String("topic", string(topic)),
+			zap.String("key", string(key)),
+			zap.Error(err))
+		return 0
+	}
+	return 1
+}
+
 // hPushSend is the WASM-callable wrapper for PushSend.
 // Inputs:
 //   userIDPtr/userIDLen — UTF-8 user ID to push to (within the function's
--- a/core/pkg/serverless/ephemeral_disconnect_test.go
+++ b/core/pkg/serverless/ephemeral_disconnect_test.go
@ -0,0 +1,52 @@
+package serverless
+
+import (
+	"context"
+	"testing"
+
+	"go.uber.org/zap"
+)
+
+// fakeWSConn is a no-op WebSocketConn for exercising WSManager lifecycle.
+type fakeWSConn struct{}
+
+func (fakeWSConn) WriteMessage(int, []byte) error    { return nil }
+func (fakeWSConn) ReadMessage() (int, []byte, error) { return 0, nil, nil }
+func (fakeWSConn) Close() error                      { return nil }
+
+// TestWSManager_DisconnectHookClearsEphemeralState verifies the wiring that
+// makes Feature #710's auto-clear work: a disconnect hook registered on the
+// WSManager fires on Unregister, clearing the disconnecting client's ephemeral
+// state. Both the stateless and persistent WS handlers call Unregister, so
+// this single hook covers both paths.
+func TestWSManager_DisconnectHookClearsEphemeralState(t *testing.T) {
+	logger := zap.NewNop()
+	wsm := NewWSManager(logger)
+	pub := &capturePublisher{}
+	store := NewEphemeralStore(pub.publish)
+
+	// Wire the hook exactly as NewHostFunctions does.
+	wsm.AddDisconnectHook(func(clientID string) {
+		store.ClearClient(context.Background(), clientID)
+	})
+
+	clientID := "client-A"
+	wsm.Register(clientID, fakeWSConn{})
+
+	if err := store.Set(context.Background(), "ns1", clientID, "t", "k", []byte("p"), 0); err != nil {
+		t.Fatalf("Set: %v", err)
+	}
+	if store.keyCountForTest() != 1 {
+		t.Fatalf("expected 1 key before disconnect, got %d", store.keyCountForTest())
+	}
+
+	// Disconnect → hook fires → state cleared + synthetic clear published.
+	wsm.Unregister(clientID)
+
+	if store.keyCountForTest() != 0 {
+		t.Errorf("disconnect hook did not clear ephemeral state, count=%d", store.keyCountForTest())
+	}
+	if pub.countKind(EphemeralEventClear) != 1 {
+		t.Errorf("expected 1 synthetic clear on disconnect, got %d", pub.countKind(EphemeralEventClear))
+	}
+}
--- a/core/pkg/serverless/ephemeral_state.go
+++ b/core/pkg/serverless/ephemeral_state.go
@ -0,0 +1,352 @@
+package serverless
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"sync"
+	"time"
+)
+
+// WS-subscribe-tracked ephemeral state primitive (bugboard #710).
+//
+// A serverless function can publish short-lived per-subscriber state (typing
+// indicators, "online" flags, cursor positions, …) keyed by (topic, key) and
+// have the gateway AUTO-CLEAR that state the moment the owning WebSocket
+// client disconnects — publishing a synthetic clear event so every subscriber
+// sees the state vanish with zero cron lag. State also expires on a TTL as a
+// backstop.
+//
+// Ownership model: each set is tagged with the CURRENT invocation's WS client
+// ID (the same source GetWSClientID reads). On disconnect the store iterates
+// that client's owned (topic,key) entries, publishes a clear event for each,
+// and drops them. A client's disconnect never touches another client's state.
+
+const (
+	// ephemeralMaxKeysPerClient caps how many distinct (topic,key) entries a
+	// single WS client may own at once. Bounds the per-client memory + the
+	// fan-out of synthetic clears on disconnect.
+	ephemeralMaxKeysPerClient = 256
+
+	// ephemeralMaxPayloadBytes caps a single ephemeral payload. Generous for
+	// presence/typing/cursor metadata while bounding gateway memory.
+	ephemeralMaxPayloadBytes = 16 << 10 // 16 KiB
+
+	// ephemeralMaxTTL caps the requested TTL. Ephemeral state is meant to be
+	// short-lived; the disconnect hook is the primary cleanup path and the TTL
+	// is only a backstop, so a long TTL is never useful.
+	ephemeralMaxTTL = 30 * time.Minute
+
+	// ephemeralDefaultTTL is applied when a caller passes ttlMs <= 0.
+	ephemeralDefaultTTL = 60 * time.Second
+
+	// ephemeralSweepInterval is how often the backstop sweeper scans for
+	// expired entries. The disconnect hook handles the common case; the
+	// sweeper only catches entries whose owner is still connected but whose
+	// TTL elapsed.
+	ephemeralSweepInterval = 10 * time.Second
+)
+
+// EphemeralEventKind discriminates the synthetic events published on a topic.
+type EphemeralEventKind string
+
+const (
+	EphemeralEventSet   EphemeralEventKind = "set"
+	EphemeralEventClear EphemeralEventKind = "clear"
+)
+
+// EphemeralEvent is the wire shape published on the topic when ephemeral state
+// is set, cleared, or auto-cleared on disconnect/expiry. Subscribers key off
+// Kind + Key to update their local view. Payload is only populated for "set".
+type EphemeralEvent struct {
+	Type     string             `json:"__ephemeral"` // always "state"
+	Kind     EphemeralEventKind `json:"kind"`        // set | clear
+	Key      string             `json:"key"`         // app-chosen key
+	ClientID string             `json:"client_id"`   // owning WS client
+	// Payload is the opaque app-chosen blob (may be JSON, protobuf, or
+	// arbitrary bytes), present only for "set". encoding/json base64-encodes
+	// a []byte on the wire, so subscribers base64-decode "payload" to recover
+	// the original bytes — mirroring how pubsub_publish_batch carries data.
+	Payload []byte `json:"payload,omitempty"`
+	Reason  string `json:"reason,omitempty"` // clear only: explicit|disconnect|expired
+}
+
+// ephemeralPublisher publishes data on a (namespace, topic). Abstracted so the
+// store can publish synthetic clears without depending on the concrete pubsub
+// adapter type — and so tests can capture published events. Namespace handling
+// matches the host pubsub path: the adapter namespaces internally, so this
+// publisher receives the already-namespaced caller's topic verbatim.
+type ephemeralPublisher func(ctx context.Context, namespace, topic string, data []byte) error
+
+// ephemeralEntry is one stored value plus its expiry and the metadata needed
+// to publish a clear event for it.
+type ephemeralEntry struct {
+	namespace string
+	topic     string
+	key       string
+	clientID  string
+	payload   []byte
+	expiresAt time.Time
+}
+
+// ephemeralStateKey identifies a stored value across namespaces/topics.
+type ephemeralStateKey struct {
+	namespace string
+	topic     string
+	key       string
+}
+
+// EphemeralStore holds WS-subscribe-tracked ephemeral state with auto-clear on
+// disconnect (bugboard #710). Safe for concurrent use.
+type EphemeralStore struct {
+	publish ephemeralPublisher
+
+	mu sync.Mutex
+	// values keyed by (ns, topic, key).
+	values map[ephemeralStateKey]*ephemeralEntry
+	// owned maps a clientID to the set of state keys it owns, for O(1)
+	// disconnect cleanup.
+	owned map[string]map[ephemeralStateKey]struct{}
+
+	// sweeper lifecycle.
+	stopOnce sync.Once
+	stopCh   chan struct{}
+	now      func() time.Time // injectable clock for tests
+}
+
+// NewEphemeralStore constructs a store with the given publisher. The publisher
+// may be nil (set/clear then skip publishing) — useful in tests, but in
+// production the host wires the pubsub adapter so subscribers see events.
+func NewEphemeralStore(publish ephemeralPublisher) *EphemeralStore {
+	return &EphemeralStore{
+		publish: publish,
+		values:  make(map[ephemeralStateKey]*ephemeralEntry),
+		owned:   make(map[string]map[ephemeralStateKey]struct{}),
+		stopCh:  make(chan struct{}),
+		now:     time.Now,
+	}
+}
+
+// Set records an ephemeral value owned by clientID and publishes a "set" event
+// on the topic so subscribers observe it. Returns an error on validation
+// failure (empty client/topic/key, oversized payload, per-client cap reached).
+func (s *EphemeralStore) Set(ctx context.Context, namespace, clientID, topic, key string, payload []byte, ttlMs int64) error {
+	if clientID == "" {
+		return fmt.Errorf("ephemeral_state_set: requires a WebSocket client (no ws_client_id in invocation context)")
+	}
+	if topic == "" || key == "" {
+		return fmt.Errorf("ephemeral_state_set: topic and key are required")
+	}
+	if len(payload) > ephemeralMaxPayloadBytes {
+		return fmt.Errorf("ephemeral_state_set: payload too large (%d > %d bytes)", len(payload), ephemeralMaxPayloadBytes)
+	}
+
+	ttl := time.Duration(ttlMs) * time.Millisecond
+	if ttl <= 0 {
+		ttl = ephemeralDefaultTTL
+	}
+	if ttl > ephemeralMaxTTL {
+		ttl = ephemeralMaxTTL
+	}
+
+	sk := ephemeralStateKey{namespace: namespace, topic: topic, key: key}
+	payloadCopy := make([]byte, len(payload))
+	copy(payloadCopy, payload)
+
+	s.mu.Lock()
+	ownedSet := s.owned[clientID]
+	// Enforce the per-client cap only for NEW keys this client doesn't already
+	// own — overwriting an existing key must always be allowed.
+	if _, alreadyOwned := s.values[sk]; !alreadyOwned || s.values[sk].clientID != clientID {
+		if len(ownedSet) >= ephemeralMaxKeysPerClient {
+			s.mu.Unlock()
+			return fmt.Errorf("ephemeral_state_set: client %s exceeded max %d ephemeral keys", clientID, ephemeralMaxKeysPerClient)
+		}
+	}
+
+	// If a different client owned this exact (ns,topic,key), transfer ownership
+	// — drop it from the previous owner's set so its disconnect won't clear
+	// state it no longer owns.
+	if prev, ok := s.values[sk]; ok && prev.clientID != clientID {
+		if prevSet := s.owned[prev.clientID]; prevSet != nil {
+			delete(prevSet, sk)
+			if len(prevSet) == 0 {
+				delete(s.owned, prev.clientID)
+			}
+		}
+	}
+
+	s.values[sk] = &ephemeralEntry{
+		namespace: namespace,
+		topic:     topic,
+		key:       key,
+		clientID:  clientID,
+		payload:   payloadCopy,
+		expiresAt: s.now().Add(ttl),
+	}
+	if ownedSet == nil {
+		ownedSet = make(map[ephemeralStateKey]struct{})
+		s.owned[clientID] = ownedSet
+	}
+	ownedSet[sk] = struct{}{}
+	s.mu.Unlock()
+
+	evt := EphemeralEvent{
+		Type:     "state",
+		Kind:     EphemeralEventSet,
+		Key:      key,
+		ClientID: clientID,
+		Payload:  payloadCopy,
+	}
+	return s.publishEvent(ctx, namespace, topic, evt)
+}
+
+// Clear removes an ephemeral value the client owns and publishes a "clear"
+// event with reason "explicit". Clearing a key the client does not own (or a
+// missing key) is a no-op that still returns nil — clears are idempotent.
+func (s *EphemeralStore) Clear(ctx context.Context, namespace, clientID, topic, key string) error {
+	if clientID == "" {
+		return fmt.Errorf("ephemeral_state_clear: requires a WebSocket client (no ws_client_id in invocation context)")
+	}
+	if topic == "" || key == "" {
+		return fmt.Errorf("ephemeral_state_clear: topic and key are required")
+	}
+
+	sk := ephemeralStateKey{namespace: namespace, topic: topic, key: key}
+
+	s.mu.Lock()
+	entry, ok := s.values[sk]
+	if !ok || entry.clientID != clientID {
+		// Not present, or owned by someone else — idempotent no-op.
+		s.mu.Unlock()
+		return nil
+	}
+	s.removeLocked(sk, entry)
+	s.mu.Unlock()
+
+	return s.publishEvent(ctx, namespace, topic, EphemeralEvent{
+		Type:     "state",
+		Kind:     EphemeralEventClear,
+		Key:      key,
+		ClientID: clientID,
+		Reason:   "explicit",
+	})
+}
+
+// ClearClient removes every entry owned by clientID and publishes a clear
+// event for each (reason "disconnect"). Called from the WS disconnect hook —
+// the primary, zero-lag cleanup path. Safe to call for an unknown client.
+func (s *EphemeralStore) ClearClient(ctx context.Context, clientID string) {
+	s.clearClientWithReason(ctx, clientID, "disconnect")
+}
+
+func (s *EphemeralStore) clearClientWithReason(ctx context.Context, clientID, reason string) {
+	s.mu.Lock()
+	ownedSet := s.owned[clientID]
+	if len(ownedSet) == 0 {
+		delete(s.owned, clientID)
+		s.mu.Unlock()
+		return
+	}
+	// Snapshot entries to publish after releasing the lock.
+	toClear := make([]*ephemeralEntry, 0, len(ownedSet))
+	for sk := range ownedSet {
+		if entry, ok := s.values[sk]; ok {
+			toClear = append(toClear, entry)
+			delete(s.values, sk)
+		}
+	}
+	delete(s.owned, clientID)
+	s.mu.Unlock()
+
+	for _, entry := range toClear {
+		_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
+			Type:     "state",
+			Kind:     EphemeralEventClear,
+			Key:      entry.key,
+			ClientID: clientID,
+			Reason:   reason,
+		})
+	}
+}
+
+// removeLocked drops one entry from both maps. Caller holds s.mu.
+func (s *EphemeralStore) removeLocked(sk ephemeralStateKey, entry *ephemeralEntry) {
+	delete(s.values, sk)
+	if set := s.owned[entry.clientID]; set != nil {
+		delete(set, sk)
+		if len(set) == 0 {
+			delete(s.owned, entry.clientID)
+		}
+	}
+}
+
+// publishEvent marshals and publishes a synthetic event. No-op (nil) when no
+// publisher is wired.
+func (s *EphemeralStore) publishEvent(ctx context.Context, namespace, topic string, evt EphemeralEvent) error {
+	if s.publish == nil {
+		return nil
+	}
+	data, err := json.Marshal(evt)
+	if err != nil {
+		return fmt.Errorf("ephemeral state: marshal event: %w", err)
+	}
+	if err := s.publish(ctx, namespace, topic, data); err != nil {
+		return fmt.Errorf("ephemeral state: publish %s event: %w", evt.Kind, err)
+	}
+	return nil
+}
+
+// StartSweeper launches the TTL backstop sweeper. Idempotent guards aren't
+// provided — call exactly once. Stop with StopSweeper.
+func (s *EphemeralStore) StartSweeper() {
+	go func() {
+		ticker := time.NewTicker(ephemeralSweepInterval)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-s.stopCh:
+				return
+			case <-ticker.C:
+				s.sweepExpired(context.Background())
+			}
+		}
+	}()
+}
+
+// StopSweeper stops the backstop sweeper. Safe to call multiple times.
+func (s *EphemeralStore) StopSweeper() {
+	s.stopOnce.Do(func() { close(s.stopCh) })
+}
+
+// sweepExpired removes and publishes clears for every entry whose TTL elapsed.
+func (s *EphemeralStore) sweepExpired(ctx context.Context) {
+	now := s.now()
+
+	s.mu.Lock()
+	var expired []*ephemeralEntry
+	for sk, entry := range s.values {
+		if now.After(entry.expiresAt) {
+			expired = append(expired, entry)
+			s.removeLocked(sk, entry)
+		}
+	}
+	s.mu.Unlock()
+
+	for _, entry := range expired {
+		_ = s.publishEvent(ctx, entry.namespace, entry.topic, EphemeralEvent{
+			Type:     "state",
+			Kind:     EphemeralEventClear,
+			Key:      entry.key,
+			ClientID: entry.clientID,
+			Reason:   "expired",
+		})
+	}
+}
+
+// keyCountForTest returns the number of stored values (test-only accessor).
+func (s *EphemeralStore) keyCountForTest() int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return len(s.values)
+}
--- a/core/pkg/serverless/ephemeral_state_test.go
+++ b/core/pkg/serverless/ephemeral_state_test.go
@ -0,0 +1,295 @@
+package serverless
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"sync"
+	"testing"
+	"time"
+)
+
+// capturePublisher records every published event for assertions.
+type capturePublisher struct {
+	mu     sync.Mutex
+	events []capturedEvent
+}
+
+type capturedEvent struct {
+	namespace string
+	topic     string
+	event     EphemeralEvent
+}
+
+func (c *capturePublisher) publish(_ context.Context, namespace, topic string, data []byte) error {
+	var evt EphemeralEvent
+	if err := json.Unmarshal(data, &evt); err != nil {
+		return err
+	}
+	c.mu.Lock()
+	c.events = append(c.events, capturedEvent{namespace: namespace, topic: topic, event: evt})
+	c.mu.Unlock()
+	return nil
+}
+
+func (c *capturePublisher) snapshot() []capturedEvent {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	out := make([]capturedEvent, len(c.events))
+	copy(out, c.events)
+	return out
+}
+
+func (c *capturePublisher) countKind(kind EphemeralEventKind) int {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	n := 0
+	for _, e := range c.events {
+		if e.event.Kind == kind {
+			n++
+		}
+	}
+	return n
+}
+
+func newTestStore(pub ephemeralPublisher) *EphemeralStore {
+	s := NewEphemeralStore(pub)
+	return s
+}
+
+func TestEphemeralStore_SetThenClear(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	ctx := context.Background()
+
+	if err := s.Set(ctx, "ns1", "client-A", "typing:room1", "k1", []byte(`{"typing":true}`), 0); err != nil {
+		t.Fatalf("Set: %v", err)
+	}
+	if s.keyCountForTest() != 1 {
+		t.Fatalf("expected 1 stored key, got %d", s.keyCountForTest())
+	}
+
+	if err := s.Clear(ctx, "ns1", "client-A", "typing:room1", "k1"); err != nil {
+		t.Fatalf("Clear: %v", err)
+	}
+	if s.keyCountForTest() != 0 {
+		t.Errorf("expected 0 stored keys after clear, got %d", s.keyCountForTest())
+	}
+
+	if got := pub.countKind(EphemeralEventSet); got != 1 {
+		t.Errorf("set events = %d, want 1", got)
+	}
+	if got := pub.countKind(EphemeralEventClear); got != 1 {
+		t.Errorf("clear events = %d, want 1", got)
+	}
+	// The set event must carry the payload verbatim.
+	evts := pub.snapshot()
+	if string(evts[0].event.Payload) != `{"typing":true}` {
+		t.Errorf("set payload = %q, want the original JSON", evts[0].event.Payload)
+	}
+	if evts[1].event.Reason != "explicit" {
+		t.Errorf("clear reason = %q, want explicit", evts[1].event.Reason)
+	}
+}
+
+func TestEphemeralStore_SetThenDisconnect(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	ctx := context.Background()
+
+	if err := s.Set(ctx, "ns1", "client-A", "topicX", "kA", []byte("p1"), 0); err != nil {
+		t.Fatalf("Set kA: %v", err)
+	}
+	if err := s.Set(ctx, "ns1", "client-A", "topicY", "kB", []byte("p2"), 0); err != nil {
+		t.Fatalf("Set kB: %v", err)
+	}
+
+	s.ClearClient(ctx, "client-A")
+
+	if s.keyCountForTest() != 0 {
+		t.Errorf("expected all state dropped on disconnect, got %d", s.keyCountForTest())
+	}
+	// One synthetic clear per owned key, all reason=disconnect.
+	if got := pub.countKind(EphemeralEventClear); got != 2 {
+		t.Errorf("disconnect clear events = %d, want 2", got)
+	}
+	for _, e := range pub.snapshot() {
+		if e.event.Kind == EphemeralEventClear && e.event.Reason != "disconnect" {
+			t.Errorf("clear reason = %q, want disconnect", e.event.Reason)
+		}
+	}
+}
+
+func TestEphemeralStore_TTLExpiry(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	ctx := context.Background()
+
+	// Freeze the clock so we control expiry deterministically.
+	base := time.Now()
+	s.now = func() time.Time { return base }
+
+	if err := s.Set(ctx, "ns1", "client-A", "topicX", "kA", []byte("p"), 1000); err != nil {
+		t.Fatalf("Set: %v", err)
+	}
+
+	// Before expiry: sweep is a no-op.
+	s.sweepExpired(ctx)
+	if s.keyCountForTest() != 1 {
+		t.Fatalf("entry expired too early, count=%d", s.keyCountForTest())
+	}
+
+	// Advance past the 1s TTL and sweep.
+	s.now = func() time.Time { return base.Add(2 * time.Second) }
+	s.sweepExpired(ctx)
+	if s.keyCountForTest() != 0 {
+		t.Errorf("entry not swept after TTL, count=%d", s.keyCountForTest())
+	}
+
+	// A clear event with reason=expired must have been published.
+	foundExpired := false
+	for _, e := range pub.snapshot() {
+		if e.event.Kind == EphemeralEventClear && e.event.Reason == "expired" {
+			foundExpired = true
+		}
+	}
+	if !foundExpired {
+		t.Error("expected a clear event with reason=expired")
+	}
+}
+
+func TestEphemeralStore_TTLClampedToMax(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	base := time.Now()
+	s.now = func() time.Time { return base }
+
+	// Request a TTL far beyond the max; it must be clamped.
+	huge := (ephemeralMaxTTL + time.Hour).Milliseconds()
+	if err := s.Set(context.Background(), "ns1", "c", "t", "k", []byte("p"), huge); err != nil {
+		t.Fatalf("Set: %v", err)
+	}
+	s.mu.Lock()
+	entry := s.values[ephemeralStateKey{namespace: "ns1", topic: "t", key: "k"}]
+	s.mu.Unlock()
+	if entry == nil {
+		t.Fatal("entry missing")
+	}
+	maxExpiry := base.Add(ephemeralMaxTTL)
+	if entry.expiresAt.After(maxExpiry) {
+		t.Errorf("TTL not clamped: expiresAt %v after max %v", entry.expiresAt, maxExpiry)
+	}
+}
+
+func TestEphemeralStore_PerClientCapEnforced(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	ctx := context.Background()
+
+	for i := 0; i < ephemeralMaxKeysPerClient; i++ {
+		if err := s.Set(ctx, "ns1", "client-A", "t", fmt.Sprintf("k%d", i), []byte("p"), 0); err != nil {
+			t.Fatalf("Set #%d: %v", i, err)
+		}
+	}
+	// The next NEW key must be rejected.
+	err := s.Set(ctx, "ns1", "client-A", "t", "overflow", []byte("p"), 0)
+	if err == nil {
+		t.Fatal("expected per-client cap error")
+	}
+	if s.keyCountForTest() != ephemeralMaxKeysPerClient {
+		t.Errorf("stored keys = %d, want %d (overflow must not be stored)", s.keyCountForTest(), ephemeralMaxKeysPerClient)
+	}
+
+	// Overwriting an EXISTING key must still succeed even at the cap.
+	if err := s.Set(ctx, "ns1", "client-A", "t", "k0", []byte("updated"), 0); err != nil {
+		t.Errorf("overwrite at cap rejected: %v", err)
+	}
+}
+
+func TestEphemeralStore_ClientIsolation(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	ctx := context.Background()
+
+	if err := s.Set(ctx, "ns1", "client-A", "t", "kA", []byte("a"), 0); err != nil {
+		t.Fatalf("Set A: %v", err)
+	}
+	if err := s.Set(ctx, "ns1", "client-B", "t", "kB", []byte("b"), 0); err != nil {
+		t.Fatalf("Set B: %v", err)
+	}
+
+	// Disconnecting A must NOT touch B's state.
+	s.ClearClient(ctx, "client-A")
+	if s.keyCountForTest() != 1 {
+		t.Fatalf("expected B's single key to survive A's disconnect, got %d", s.keyCountForTest())
+	}
+	s.mu.Lock()
+	_, bSurvives := s.values[ephemeralStateKey{namespace: "ns1", topic: "t", key: "kB"}]
+	s.mu.Unlock()
+	if !bSurvives {
+		t.Error("client-B's state was wrongly cleared by client-A's disconnect")
+	}
+
+	// A also cannot clear B's key (not the owner): idempotent no-op.
+	if err := s.Clear(ctx, "ns1", "client-A", "t", "kB"); err != nil {
+		t.Fatalf("cross-client Clear should be a no-op, got err: %v", err)
+	}
+	if s.keyCountForTest() != 1 {
+		t.Error("client-A managed to clear client-B's key")
+	}
+}
+
+func TestEphemeralStore_SetValidation(t *testing.T) {
+	s := newTestStore(nil)
+	ctx := context.Background()
+
+	if err := s.Set(ctx, "ns1", "", "t", "k", nil, 0); err == nil {
+		t.Error("expected error for empty client ID")
+	}
+	if err := s.Set(ctx, "ns1", "c", "", "k", nil, 0); err == nil {
+		t.Error("expected error for empty topic")
+	}
+	if err := s.Set(ctx, "ns1", "c", "t", "", nil, 0); err == nil {
+		t.Error("expected error for empty key")
+	}
+	big := make([]byte, ephemeralMaxPayloadBytes+1)
+	if err := s.Set(ctx, "ns1", "c", "t", "k", big, 0); err == nil {
+		t.Error("expected error for oversized payload")
+	}
+}
+
+func TestEphemeralStore_ClearClientUnknownIsNoOp(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	// No panic, no events for an unknown client.
+	s.ClearClient(context.Background(), "nobody")
+	if len(pub.snapshot()) != 0 {
+		t.Error("ClearClient on unknown client should publish nothing")
+	}
+}
+
+func TestEphemeralStore_OwnershipTransfer(t *testing.T) {
+	pub := &capturePublisher{}
+	s := newTestStore(pub.publish)
+	ctx := context.Background()
+
+	// client-A sets, then client-B overwrites the SAME (topic,key).
+	if err := s.Set(ctx, "ns1", "client-A", "t", "shared", []byte("a"), 0); err != nil {
+		t.Fatalf("Set A: %v", err)
+	}
+	if err := s.Set(ctx, "ns1", "client-B", "t", "shared", []byte("b"), 0); err != nil {
+		t.Fatalf("Set B: %v", err)
+	}
+
+	// A's disconnect must NOT clear the key now owned by B.
+	s.ClearClient(ctx, "client-A")
+	if s.keyCountForTest() != 1 {
+		t.Errorf("ownership transfer failed: key dropped on prior owner's disconnect, count=%d", s.keyCountForTest())
+	}
+
+	// B's disconnect clears it.
+	s.ClearClient(ctx, "client-B")
+	if s.keyCountForTest() != 0 {
+		t.Errorf("new owner's disconnect did not clear, count=%d", s.keyCountForTest())
+	}
+}
--- a/core/pkg/serverless/execution/executor.go
+++ b/core/pkg/serverless/execution/executor.go
@ -3,6 +3,7 @@ package execution
 import (
 	"bytes"
 	"context"
+	cryptorand "crypto/rand"
 	"encoding/json"
 	"fmt"

@ -80,7 +81,15 @@ func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledMo
 		// invocation that uses time.Now() (receipts, audit rows, cursor cmp).
 		// Same fix applied at engine.go for the persistent-WS path.
 		WithSysWalltime().
-		WithSysNanotime()
+		WithSysNanotime().
+		// Bugboard #120 — same class as #27. Without WithRandSource, wazero
+		// uses a deterministic zero-seed RNG, so TinyGo's crypto/rand.Read
+		// returns IDENTICAL bytes on every fresh instance (and every
+		// invocation is a fresh instance). That makes any unguessable ID /
+		// code / nonce / token constant. Wire in the host CSPRNG so
+		// crypto/rand (and auto-seeded math/rand) work. Same fix at
+		// engine.go for the persistent-WS path.
+		WithRandSource(cryptorand.Reader)

 	// Acquire concurrency slot
 	if e.sem != nil {
--- a/core/pkg/serverless/execution/randsource_test.go
+++ b/core/pkg/serverless/execution/randsource_test.go
@ -0,0 +1,181 @@
+package execution
+
+import (
+	"context"
+	cryptorand "crypto/rand"
+	"encoding/binary"
+	"testing"
+
+	"github.com/tetratelabs/wazero"
+	"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
+)
+
+// Bugboard #120 — wazero defaults to a DETERMINISTIC (zero-seed) RNG source.
+// TinyGo wasm's crypto/rand.Read calls WASI random_get, so without
+// .WithRandSource(crypto/rand.Reader) every fresh instance gets the IDENTICAL
+// "random" byte sequence. Each serverless invocation is a fresh instance, so
+// any unguessable code / nonce / token a function generates is constant (the
+// observed "8LRJ2S on every rotate" symptom).
+//
+// The fix is .WithRandSource(cryptorand.Reader) on BOTH wazero moduleConfig
+// builders — executor.go (stateless) and engine.go (persistent WS). This test
+// pins the executor's config path: instantiate the SAME config twice and assert
+// the two instances produce DIFFERENT random bytes.
+//
+// If a future refactor drops .WithRandSource(), the positive test fails with a
+// clear message; the negative control documents why the fix is necessary.
+
+// randProbeWasm is a hand-assembled WASM module that imports
+// wasi_snapshot_preview1.random_get and calls it from _start, writing 8 random
+// bytes to memory[0:8].
+//
+//	(module
+//	  (type $random_get (func (param i32 i32) (result i32)))
+//	  (type $start (func))
+//	  (import "wasi_snapshot_preview1" "random_get"
+//	    (func $random_get (type 0)))
+//	  (memory (export "memory") 1)
+//	  (func $_start (type 1)
+//	    i32.const 0         ;; buf = 0
+//	    i32.const 8         ;; buf_len = 8
+//	    call $random_get
+//	    drop)
+//	  (export "_start" (func $_start)))
+var randProbeWasm = []byte{
+	// Magic + version
+	0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
+
+	// Type section (id=1) — body=10 bytes
+	0x01,
+	0x0a,
+	0x02,                   // 2 types
+	0x60, 0x02, 0x7f, 0x7f, // type 0: func(i32, i32)
+	0x01, 0x7f, // -> (i32)
+	0x60, 0x00, 0x00, // type 1: func() -> ()
+
+	// Import section (id=2) — body=0x25 (37 bytes)
+	0x02,
+	0x25,
+	0x01, // 1 import
+	0x16, // module name "wasi_snapshot_preview1" length=22
+	0x77, 0x61, 0x73, 0x69, 0x5f, 0x73, 0x6e, 0x61, 0x70, 0x73, 0x68, 0x6f, 0x74, 0x5f, 0x70, 0x72, 0x65, 0x76, 0x69, 0x65, 0x77, 0x31,
+	0x0a, // fn name "random_get" length=10
+	0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x5f, 0x67, 0x65, 0x74,
+	0x00, 0x00, // kind=func, type idx=0
+
+	// Function section (id=3) — body=2 bytes
+	0x03,
+	0x02,
+	0x01, // 1 function
+	0x01, // type idx 1 (for _start)
+
+	// Memory section (id=5) — body=3 bytes
+	0x05,
+	0x03,
+	0x01,       // 1 memory
+	0x00, 0x01, // limits: flags=0 (no max), min=1 page
+
+	// Export section (id=7) — body=19 bytes (0x13)
+	0x07,
+	0x13,
+	0x02,                                     // 2 exports
+	0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, // "memory"
+	0x02, 0x00, // kind=memory, idx=0
+	0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, // "_start"
+	0x00, 0x01, // kind=func, idx=1 (after the 1 import)
+
+	// Code section (id=10) — body=11 bytes (0x0b)
+	0x0a,
+	0x0b,
+	0x01,       // 1 function body
+	0x09,       // body size = 9
+	0x00,       // 0 local groups
+	0x41, 0x00, // i32.const 0  (buf)
+	0x41, 0x08, // i32.const 8  (buf_len)
+	0x10, 0x00, // call func 0 (the imported random_get)
+	0x1a, // drop (errno return)
+	0x0b, // end
+}
+
+// readProbeRandom instantiates randProbeWasm once with the given moduleConfig
+// transform and returns the 8 random bytes written to memory[0:8].
+func readProbeRandom(t *testing.T, runtime wazero.Runtime, compiled wazero.CompiledModule, cfg wazero.ModuleConfig) uint64 {
+	t.Helper()
+	ctx := context.Background()
+	mod, err := runtime.InstantiateModule(ctx, compiled, cfg)
+	if err != nil {
+		t.Fatalf("instantiate probe module: %v", err)
+	}
+	defer mod.Close(ctx)
+	raw, ok := mod.Memory().Read(0, 8)
+	if !ok {
+		t.Fatal("could not read 8 bytes from probe memory at offset 0")
+	}
+	return binary.LittleEndian.Uint64(raw)
+}
+
+func TestModuleConfig_randSourceIsRealNotDeterministic(t *testing.T) {
+	ctx := context.Background()
+	runtime := wazero.NewRuntime(ctx)
+	defer runtime.Close(ctx)
+
+	if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil {
+		t.Fatalf("instantiate WASI: %v", err)
+	}
+	compiled, err := runtime.CompileModule(ctx, randProbeWasm)
+	if err != nil {
+		t.Fatalf("compile probe wasm: %v (hex assembly likely off; recompute section sizes)", err)
+	}
+	defer compiled.Close(ctx)
+
+	// Mirror the executor.go moduleConfig — anonymous instance, real RNG. Two
+	// separate instantiations of the SAME config must produce different bytes.
+	newCfg := func() wazero.ModuleConfig {
+		return wazero.NewModuleConfig().
+			WithName("").
+			WithArgs("probe").
+			WithSysWalltime().
+			WithSysNanotime().
+			WithRandSource(cryptorand.Reader)
+	}
+
+	a := readProbeRandom(t, runtime, compiled, newCfg())
+	b := readProbeRandom(t, runtime, compiled, newCfg())
+	if a == b {
+		t.Errorf("BUG #120 REGRESSION: two fresh instances produced IDENTICAL random "+
+			"bytes (%#016x) — crypto/rand is deterministic. Did the "+
+			".WithRandSource(cryptorand.Reader) call get dropped from moduleConfig "+
+			"in executor.go or engine.go?", a)
+	}
+}
+
+func TestModuleConfig_randWithoutFix_demoDeterministic(t *testing.T) {
+	// Negative control: WITHOUT .WithRandSource(), confirm wazero's default RNG
+	// is deterministic (identical bytes across fresh instances). This pins the
+	// *cause*. If wazero ever defaults to a real entropy source, this test
+	// fails — making the change visible instead of silently invalidating the
+	// fix's necessity.
+	ctx := context.Background()
+	runtime := wazero.NewRuntime(ctx)
+	defer runtime.Close(ctx)
+
+	if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil {
+		t.Fatalf("instantiate WASI: %v", err)
+	}
+	compiled, err := runtime.CompileModule(ctx, randProbeWasm)
+	if err != nil {
+		t.Fatalf("compile probe wasm: %v", err)
+	}
+	defer compiled.Close(ctx)
+
+	newDefault := func() wazero.ModuleConfig {
+		return wazero.NewModuleConfig().WithName("").WithArgs("probe")
+	}
+	a := readProbeRandom(t, runtime, compiled, newDefault())
+	b := readProbeRandom(t, runtime, compiled, newDefault())
+	if a != b {
+		t.Skipf("wazero default RandSource now differs across instances (%#016x vs %#016x) — "+
+			"if real-by-default upstream, the bug-#120 fix may be redundant; review", a, b)
+	}
+	// Determinism confirmed → fix is meaningful.
+}
--- a/core/pkg/serverless/hostfuncs_test.go
+++ b/core/pkg/serverless/hostfuncs_test.go
@ -134,6 +134,18 @@ func (m *mockHostServices) WSPubSubUnbridge(ctx context.Context, clientID, topic
 	return nil
 }

+func (m *mockHostServices) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
+	return SetRawHTTPResponse(ctx, status, headers, body)
+}
+
+func (m *mockHostServices) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
+	return nil
+}
+
+func (m *mockHostServices) EphemeralStateClear(ctx context.Context, topic, key string) error {
+	return nil
+}
+
 func (m *mockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
 	return nil
 }
--- a/core/pkg/serverless/hostfunctions/host_services.go
+++ b/core/pkg/serverless/hostfunctions/host_services.go
@ -1,6 +1,7 @@
 package hostfunctions

 import (
+	"context"
 	"net/http"
 	"time"

@ -57,7 +58,7 @@ func NewHostFunctions(
 		anyoneHTTPClient.Timeout = httpTimeout
 	}

-	return &HostFunctions{
+	hf := &HostFunctions{
 		db:               db,
 		cacheClient:      cacheClient,
 		storage:          storage,
@ -77,4 +78,28 @@ func NewHostFunctions(
 		logs:             make([]serverless.LogEntry, 0),
 		asyncInvokeSem:   make(chan struct{}, asyncInvokeMaxInFlight),
 	}
+
+	// Ephemeral-state store (bugboard #710). Publishes synthetic set/clear
+	// events through the same pubsub adapter the pubsub_publish host fn uses,
+	// and registers a WS disconnect hook so a client's owned state auto-clears
+	// the instant its WebSocket drops — zero cron lag. Only wired when a
+	// concrete WSManager is present (the disconnect hook + sweeper need it);
+	// otherwise ephemeral_state_set returns an error.
+	if wsm, ok := wsManager.(*serverless.WSManager); ok && wsm != nil {
+		var publish func(ctx context.Context, namespace, topic string, data []byte) error
+		if pubsubAdapter != nil {
+			publish = func(ctx context.Context, _ string, topic string, data []byte) error {
+				// The adapter namespaces internally (same as PubSubPublish), so
+				// the namespace arg is informational only here.
+				return pubsubAdapter.Publish(ctx, topic, data)
+			}
+		}
+		hf.ephemeralStore = serverless.NewEphemeralStore(publish)
+		wsm.AddDisconnectHook(func(clientID string) {
+			hf.ephemeralStore.ClearClient(context.Background(), clientID)
+		})
+		hf.ephemeralStore.StartSweeper()
+	}
+
+	return hf
 }
--- a/core/pkg/serverless/hostfunctions/http.go
+++ b/core/pkg/serverless/hostfunctions/http.go
@ -17,6 +17,18 @@ func (h *HostFunctions) HTTPFetch(ctx context.Context, method, url string, heade
 	return h.doFetch(ctx, "http_fetch", h.httpClient, method, url, headers, body)
 }

+// SetHTTPResponse records a verbatim HTTP response for a RawHTTPResponse
+// function (bugboard #835). It delegates to the per-invocation collector
+// attached on ctx by the engine; the HTTP invoke handler replays the result
+// byte-for-byte. Validation (raw mode enabled, status range, header/body caps)
+// lives in serverless.SetRawHTTPResponse.
+func (h *HostFunctions) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
+	if err := serverless.SetRawHTTPResponse(ctx, status, headers, body); err != nil {
+		return &serverless.HostFunctionError{Function: "set_http_response", Cause: err}
+	}
+	return nil
+}
+
 // AnyoneFetch makes an outbound HTTP request routed through the Anyone
 // (ANyONe protocol) SOCKS5 proxy, so the third-party endpoint sees an
 // Anyone exit IP instead of the gateway IP and the gateway can't
--- a/core/pkg/serverless/hostfunctions/pubsub.go
+++ b/core/pkg/serverless/hostfunctions/pubsub.go
@ -186,6 +186,40 @@ func dedupBatchByTopic(msgs []pubsub.TopicMessage) []pubsub.TopicMessage {
 	return out
 }

+// EphemeralStateSet records WS-subscribe-tracked ephemeral state for the
+// current invocation's WS client and publishes a "set" event (bugboard #710).
+// The owning client ID and namespace are derived from the invocation context —
+// the function cannot spoof them. Auto-clears on the client's WS disconnect.
+func (h *HostFunctions) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
+	if h.ephemeralStore == nil {
+		return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
+	}
+	cur := h.currentInvocationContext(ctx)
+	if cur == nil {
+		return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: fmt.Errorf("no invocation context")}
+	}
+	if err := h.ephemeralStore.Set(ctx, cur.Namespace, cur.WSClientID, topic, key, payload, ttlMs); err != nil {
+		return &serverless.HostFunctionError{Function: "ephemeral_state_set", Cause: err}
+	}
+	return nil
+}
+
+// EphemeralStateClear removes ephemeral state the current WS client owns and
+// publishes a "clear" event (bugboard #710). Idempotent.
+func (h *HostFunctions) EphemeralStateClear(ctx context.Context, topic, key string) error {
+	if h.ephemeralStore == nil {
+		return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: fmt.Errorf("ephemeral state not available on this gateway")}
+	}
+	cur := h.currentInvocationContext(ctx)
+	if cur == nil {
+		return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: fmt.Errorf("no invocation context")}
+	}
+	if err := h.ephemeralStore.Clear(ctx, cur.Namespace, cur.WSClientID, topic, key); err != nil {
+		return &serverless.HostFunctionError{Function: "ephemeral_state_clear", Cause: err}
+	}
+	return nil
+}
+
 // WSSend sends data to a specific WebSocket client.
 func (h *HostFunctions) WSSend(ctx context.Context, clientID string, data []byte) error {
 	if h.wsManager == nil {
--- a/core/pkg/serverless/hostfunctions/secrets.go
+++ b/core/pkg/serverless/hostfunctions/secrets.go
@ -14,6 +14,9 @@ import (
 	"go.uber.org/zap"
 )

+// secretsKeyBytes is the required length of the AES-256 encryption key.
+const secretsKeyBytes = 32
+
 // DBSecretsManager implements SecretsManager using the database.
 type DBSecretsManager struct {
 	db            rqlite.Client
@ -25,21 +28,34 @@ type DBSecretsManager struct {
 var _ serverless.SecretsManager = (*DBSecretsManager)(nil)

 // NewDBSecretsManager creates a secrets manager backed by the database.
-func NewDBSecretsManager(db rqlite.Client, encryptionKeyHex string, logger *zap.Logger) (*DBSecretsManager, error) {
+//
+// encryptionKeyHex must be a 32-byte AES-256 key, hex-encoded (64 chars).
+//
+// When encryptionKeyHex is empty the behaviour depends on allowEphemeral:
+//   - allowEphemeral=false (production): returns an error. A misconfigured
+//     node must fail loudly rather than silently generate a per-process
+//     ephemeral key. With an ephemeral key, secrets encrypted by one
+//     process cannot be decrypted by another (or after a restart), which
+//     makes get_secret return garbage/errors (bugboard #837).
+//   - allowEphemeral=true (tests/dev): generates a random per-process key
+//     and logs a warning. Secrets will not persist across restarts.
+func NewDBSecretsManager(db rqlite.Client, encryptionKeyHex string, allowEphemeral bool, logger *zap.Logger) (*DBSecretsManager, error) {
 	var key []byte
 	if encryptionKeyHex != "" {
 		var err error
 		key, err = hex.DecodeString(encryptionKeyHex)
-		if err != nil || len(key) != 32 {
-			return nil, fmt.Errorf("invalid encryption key: must be 32 bytes hex-encoded")
+		if err != nil || len(key) != secretsKeyBytes {
+			return nil, fmt.Errorf("invalid secrets encryption key: must be %d bytes hex-encoded (%d hex chars)", secretsKeyBytes, secretsKeyBytes*2)
 		}
-	} else {
-		// Generate a random key if none provided
-		key = make([]byte, 32)
+	} else if allowEphemeral {
+		// Generate a random per-process key (dev/test only).
+		key = make([]byte, secretsKeyBytes)
 		if _, err := rand.Read(key); err != nil {
-			return nil, fmt.Errorf("failed to generate encryption key: %w", err)
+			return nil, fmt.Errorf("failed to generate ephemeral secrets encryption key: %w", err)
 		}
-		logger.Warn("Generated random secrets encryption key - secrets will not persist across restarts")
+		logger.Warn("Generated random ephemeral secrets encryption key - secrets will NOT persist across restarts (dev/test only)")
+	} else {
+		return nil, fmt.Errorf("secrets encryption key is required: set secrets_encryption_key (see %s/secrets/secrets-encryption-key); without it secrets cannot be decrypted across processes or restarts (bugboard #837)", "~/.orama")
 	}

 	return &DBSecretsManager{
--- a/core/pkg/serverless/hostfunctions/secrets_test.go
+++ b/core/pkg/serverless/hostfunctions/secrets_test.go
@ -0,0 +1,199 @@
+package hostfunctions
+
+import (
+	"context"
+	"database/sql"
+	"errors"
+	"strings"
+	"testing"
+
+	"github.com/DeBrosOfficial/network/pkg/rqlite"
+	"github.com/DeBrosOfficial/network/pkg/serverless"
+	"go.uber.org/zap"
+)
+
+// fakeSecretsDB is an in-memory rqlite.Client stub that implements only the
+// Exec/Query paths used by DBSecretsManager (INSERT...ON CONFLICT upsert and
+// SELECT by namespace+name). Storing the encrypted blob in a map lets us
+// round-trip a Set through a Get — the core of the bugboard #837 regression.
+type fakeSecretsDB struct {
+	rqlite.Client
+	store map[string][]byte // key: namespace\x00name -> encrypted_value
+}
+
+func newFakeSecretsDB() *fakeSecretsDB {
+	return &fakeSecretsDB{store: map[string][]byte{}}
+}
+
+func storeKey(namespace, name string) string {
+	return namespace + "\x00" + name
+}
+
+// Exec handles the upsert. args order matches secrets.go Set():
+// (id, namespace, name, encrypted_value, created_at, updated_at).
+func (f *fakeSecretsDB) Exec(ctx context.Context, query string, args ...any) (sql.Result, error) {
+	if strings.Contains(query, "INSERT INTO function_secrets") {
+		namespace, _ := args[1].(string)
+		name, _ := args[2].(string)
+		enc, _ := args[3].([]byte)
+		cp := make([]byte, len(enc))
+		copy(cp, enc)
+		f.store[storeKey(namespace, name)] = cp
+		return fakeResult{rows: 1}, nil
+	}
+	return fakeResult{}, nil
+}
+
+// Query handles the SELECT encrypted_value ... WHERE namespace=? AND name=?.
+func (f *fakeSecretsDB) Query(ctx context.Context, dest any, query string, args ...any) error {
+	if !strings.Contains(query, "SELECT encrypted_value") {
+		return errors.New("unexpected query")
+	}
+	namespace, _ := args[0].(string)
+	name, _ := args[1].(string)
+	rows, ok := dest.(*[]struct {
+		EncryptedValue []byte `db:"encrypted_value"`
+	})
+	if !ok {
+		return errors.New("unexpected dest type")
+	}
+	if enc, found := f.store[storeKey(namespace, name)]; found {
+		*rows = append(*rows, struct {
+			EncryptedValue []byte `db:"encrypted_value"`
+		}{EncryptedValue: enc})
+	}
+	return nil
+}
+
+type fakeResult struct{ rows int64 }
+
+func (r fakeResult) LastInsertId() (int64, error) { return 0, nil }
+func (r fakeResult) RowsAffected() (int64, error) { return r.rows, nil }
+
+// validKey is a 32-byte AES-256 key, hex-encoded (64 chars).
+const validKey = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
+
+// otherKey is a different valid 32-byte key.
+const otherKey = "fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210"
+
+// TestDBSecretsManager_SetGetRoundTrip_sameKey proves the fix: a secret
+// encrypted with a fixed key is decryptable by a SEPARATE manager constructed
+// with the SAME key (simulating another process / a restart).
+func TestDBSecretsManager_SetGetRoundTrip_sameKey(t *testing.T) {
+	db := newFakeSecretsDB()
+	logger := zap.NewNop()
+	ctx := context.Background()
+
+	writer, err := NewDBSecretsManager(db, validKey, false, logger)
+	if err != nil {
+		t.Fatalf("NewDBSecretsManager (writer) failed: %v", err)
+	}
+	if err := writer.Set(ctx, "ns1", "API_TOKEN", "s3cr3t-value"); err != nil {
+		t.Fatalf("Set failed: %v", err)
+	}
+
+	// A fresh manager with the SAME key (different process / post-restart).
+	reader, err := NewDBSecretsManager(db, validKey, false, logger)
+	if err != nil {
+		t.Fatalf("NewDBSecretsManager (reader) failed: %v", err)
+	}
+	got, err := reader.Get(ctx, "ns1", "API_TOKEN")
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+	if got != "s3cr3t-value" {
+		t.Errorf("Get returned %q, want %q", got, "s3cr3t-value")
+	}
+}
+
+// TestDBSecretsManager_GetWithDifferentKey_fails proves the bug it guards
+// against: a manager with a DIFFERENT key cannot decrypt — exactly what
+// happened when each process generated its own ephemeral key (bugboard #837).
+func TestDBSecretsManager_GetWithDifferentKey_fails(t *testing.T) {
+	db := newFakeSecretsDB()
+	logger := zap.NewNop()
+	ctx := context.Background()
+
+	writer, err := NewDBSecretsManager(db, validKey, false, logger)
+	if err != nil {
+		t.Fatalf("NewDBSecretsManager (writer) failed: %v", err)
+	}
+	if err := writer.Set(ctx, "ns1", "API_TOKEN", "s3cr3t-value"); err != nil {
+		t.Fatalf("Set failed: %v", err)
+	}
+
+	reader, err := NewDBSecretsManager(db, otherKey, false, logger)
+	if err != nil {
+		t.Fatalf("NewDBSecretsManager (reader) failed: %v", err)
+	}
+	if _, err := reader.Get(ctx, "ns1", "API_TOKEN"); err == nil {
+		t.Fatal("expected decryption to fail with a different key, got nil error")
+	}
+}
+
+// TestDBSecretsManager_emptyKey_isLoud verifies the production constructor
+// refuses to start with an empty key (allowEphemeral=false) instead of
+// silently generating an undecryptable ephemeral key.
+func TestDBSecretsManager_emptyKey_isLoud(t *testing.T) {
+	db := newFakeSecretsDB()
+	_, err := NewDBSecretsManager(db, "", false, zap.NewNop())
+	if err == nil {
+		t.Fatal("expected error for empty key with allowEphemeral=false, got nil")
+	}
+	if !strings.Contains(err.Error(), "secrets encryption key is required") {
+		t.Errorf("unexpected error message: %v", err)
+	}
+}
+
+// TestDBSecretsManager_emptyKey_ephemeralAllowed verifies tests/dev can still
+// opt into a per-process ephemeral key.
+func TestDBSecretsManager_emptyKey_ephemeralAllowed(t *testing.T) {
+	db := newFakeSecretsDB()
+	mgr, err := NewDBSecretsManager(db, "", true, zap.NewNop())
+	if err != nil {
+		t.Fatalf("expected ephemeral key to be allowed, got error: %v", err)
+	}
+	// Ephemeral key still round-trips within the same process.
+	ctx := context.Background()
+	if err := mgr.Set(ctx, "ns1", "K", "v"); err != nil {
+		t.Fatalf("Set failed: %v", err)
+	}
+	got, err := mgr.Get(ctx, "ns1", "K")
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+	if got != "v" {
+		t.Errorf("Get returned %q, want %q", got, "v")
+	}
+}
+
+// TestDBSecretsManager_invalidKey_rejected covers malformed keys (wrong
+// length, non-hex) at the boundary.
+func TestDBSecretsManager_invalidKey_rejected(t *testing.T) {
+	db := newFakeSecretsDB()
+	cases := map[string]string{
+		"too short":   "abcd",
+		"odd hex":     "abc",
+		"not hex":     strings.Repeat("zz", 32),
+		"wrong bytes": "0123456789abcdef", // 8 bytes, not 32
+	}
+	for name, key := range cases {
+		t.Run(name, func(t *testing.T) {
+			if _, err := NewDBSecretsManager(db, key, false, zap.NewNop()); err == nil {
+				t.Fatalf("expected error for invalid key %q, got nil", key)
+			}
+		})
+	}
+}
+
+// TestDBSecretsManager_Get_notFound verifies the not-found sentinel survives.
+func TestDBSecretsManager_Get_notFound(t *testing.T) {
+	db := newFakeSecretsDB()
+	mgr, err := NewDBSecretsManager(db, validKey, false, zap.NewNop())
+	if err != nil {
+		t.Fatalf("NewDBSecretsManager failed: %v", err)
+	}
+	if _, err := mgr.Get(context.Background(), "ns1", "missing"); !errors.Is(err, serverless.ErrSecretNotFound) {
+		t.Errorf("expected ErrSecretNotFound, got %v", err)
+	}
+}
--- a/core/pkg/serverless/hostfunctions/types.go
+++ b/core/pkg/serverless/hostfunctions/types.go
@ -97,6 +97,13 @@ type HostFunctions struct {
 	triggerDispatcher     *triggers.PubSubDispatcher
 	triggerDispatcherLock sync.RWMutex

+	// ephemeralStore backs ephemeral_state_set / ephemeral_state_clear
+	// (bugboard #710). Constructed in NewHostFunctions when a WS manager is
+	// present; nil otherwise (host fns then return an error). The store
+	// registers a disconnect hook on the WS manager so a client's owned state
+	// auto-clears the instant its WebSocket disconnects.
+	ephemeralStore *serverless.EphemeralStore
+
 	// Current invocation context (set per-execution)
 	invCtx     *serverless.InvocationContext
 	invCtxLock sync.RWMutex
--- a/core/pkg/serverless/invoke.go
+++ b/core/pkg/serverless/invoke.go
@ -75,6 +75,13 @@ type InvokeResponse struct {
 	Error      string           `json:"error,omitempty"`
 	DurationMS int64            `json:"duration_ms"`
 	Retries    int              `json:"retries,omitempty"`
+
+	// RawHTTP carries a verbatim HTTP response set by a RawHTTPResponse
+	// function via set_http_response (bugboard #835). nil for normal
+	// functions and for raw functions that never called set_http_response —
+	// the HTTP handler falls back to the standard JSON/Ack path in that case.
+	// Not serialized; consumed directly by the HTTP invoke handler.
+	RawHTTP *RawHTTPResult `json:"-"`
 }

 // Invoke executes a function with automatic retry logic.
@ -169,6 +176,8 @@ func (i *Invoker) Invoke(ctx context.Context, req *InvokeRequest) (*InvokeRespon
 	}

 	response.Status = InvocationStatusSuccess
+	// Surface any verbatim HTTP response the function set (bugboard #835).
+	response.RawHTTP = invCtx.RawHTTP
 	return response, nil
 }

--- a/core/pkg/serverless/mocks_test.go
+++ b/core/pkg/serverless/mocks_test.go
@ -247,6 +247,18 @@ func (m *MockHostServices) WSPubSubUnbridge(ctx context.Context, clientID, topic
 	return nil
 }

+func (m *MockHostServices) SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
+	return SetRawHTTPResponse(ctx, status, headers, body)
+}
+
+func (m *MockHostServices) EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error {
+	return nil
+}
+
+func (m *MockHostServices) EphemeralStateClear(ctx context.Context, topic, key string) error {
+	return nil
+}
+
 func (m *MockHostServices) WSSend(ctx context.Context, clientID string, data []byte) error {
 	return nil
 }
--- a/core/pkg/serverless/raw_http.go
+++ b/core/pkg/serverless/raw_http.go
@ -0,0 +1,142 @@
+package serverless
+
+import (
+	"context"
+	"fmt"
+	"sync"
+)
+
+// Raw-HTTP-response mode (bugboard #835).
+//
+// A function deployed with RawHTTPResponse=true can emit a verbatim HTTP
+// response (status + headers + body) instead of the JSON/Ack-wrapped output
+// the stateless invoke handler normally produces. This lets a namespace app
+// proxy an upstream RPC (Helius / Alchemy) transparently — the function reads
+// the request, calls the upstream, and replays the upstream's status, headers,
+// and body byte-for-byte back to its own caller.
+//
+// The primitive provided here is ONLY the response carrier + the host-call
+// validation. Per-user-JWT quota gating (which the ticket mentions) is the
+// APP's responsibility: the function can call oh.GetCallerJwtSubject() and
+// decide whether to serve. The gateway does not implement quota here.
+
+const (
+	// rawHTTPMaxHeaders caps how many response headers a function may set.
+	// Generous for a proxy use-case (upstream RPCs return well under this)
+	// while bounding the per-invocation allocation a hostile function could
+	// force.
+	rawHTTPMaxHeaders = 64
+
+	// rawHTTPMaxBodyBytes caps the verbatim response body a function may set.
+	// 8 MiB comfortably covers JSON-RPC responses (even large getBlock /
+	// getProgramAccounts payloads) without letting a function buffer an
+	// unbounded body in gateway memory.
+	rawHTTPMaxBodyBytes = 8 << 20
+
+	// rawHTTPMinStatus / rawHTTPMaxStatus bound a valid HTTP status code.
+	rawHTTPMinStatus = 100
+	rawHTTPMaxStatus = 599
+)
+
+// RawHTTPResult is a verbatim HTTP response set by a RawHTTPResponse function.
+// Set is true once the function has called set_http_response at least once;
+// the invoke handler only takes the raw path when Set is true (otherwise it
+// falls back to the normal JSON/Ack-wrapped behavior).
+type RawHTTPResult struct {
+	Status  int
+	Headers map[string]string
+	Body    []byte
+	Set     bool
+}
+
+// rawHTTPCollector is the mutable per-invocation sink the set_http_response
+// host function writes to. It rides the invocation's context (same per-call
+// propagation model as the publish counter and log buffer) so concurrent
+// invocations never cross-write each other's response.
+type rawHTTPCollector struct {
+	mu     sync.Mutex
+	result RawHTTPResult
+}
+
+// rawHTTPKey is the unexported context-value key for the raw-HTTP collector.
+type rawHTTPKey struct{}
+
+// WithRawHTTPCollector returns a derived ctx carrying a FRESH per-invocation
+// raw-HTTP response collector. The engine attaches this before executing a
+// RawHTTPResponse function so the set_http_response host call has somewhere to
+// write; for non-raw functions the collector is absent and the host call is a
+// validated no-op.
+func WithRawHTTPCollector(ctx context.Context) context.Context {
+	return context.WithValue(ctx, rawHTTPKey{}, &rawHTTPCollector{})
+}
+
+// rawHTTPCollectorFromCtx extracts the collector attached via
+// WithRawHTTPCollector, or nil if none is present (non-raw function, or an
+// untracked code path).
+func rawHTTPCollectorFromCtx(ctx context.Context) *rawHTTPCollector {
+	if ctx == nil {
+		return nil
+	}
+	c, _ := ctx.Value(rawHTTPKey{}).(*rawHTTPCollector)
+	return c
+}
+
+// SetRawHTTPResponse records a verbatim HTTP response on the invocation's
+// collector. Returns an error if no collector is attached (the function was
+// not deployed with RawHTTPResponse), or if the status / header count / body
+// size fail validation. Headers may be nil. The body is copied so the caller
+// (which reads it out of guest WASM memory) may reuse its buffer.
+func SetRawHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error {
+	c := rawHTTPCollectorFromCtx(ctx)
+	if c == nil {
+		return fmt.Errorf("set_http_response: function is not deployed with raw_http_response enabled")
+	}
+	if status < rawHTTPMinStatus || status > rawHTTPMaxStatus {
+		return fmt.Errorf("set_http_response: status %d out of range [%d,%d]", status, rawHTTPMinStatus, rawHTTPMaxStatus)
+	}
+	if len(headers) > rawHTTPMaxHeaders {
+		return fmt.Errorf("set_http_response: too many headers (%d > %d)", len(headers), rawHTTPMaxHeaders)
+	}
+	if len(body) > rawHTTPMaxBodyBytes {
+		return fmt.Errorf("set_http_response: body too large (%d bytes > %d)", len(body), rawHTTPMaxBodyBytes)
+	}
+
+	bodyCopy := make([]byte, len(body))
+	copy(bodyCopy, body)
+
+	var hdrCopy map[string]string
+	if len(headers) > 0 {
+		hdrCopy = make(map[string]string, len(headers))
+		for k, v := range headers {
+			hdrCopy[k] = v
+		}
+	}
+
+	c.mu.Lock()
+	c.result = RawHTTPResult{
+		Status:  status,
+		Headers: hdrCopy,
+		Body:    bodyCopy,
+		Set:     true,
+	}
+	c.mu.Unlock()
+	return nil
+}
+
+// TakeRawHTTPResponse returns the raw HTTP response recorded on the ctx's
+// collector and whether one was set. Returns (zero, false) when no collector
+// is attached or the function never called set_http_response. The engine calls
+// this after Execute to surface the response on the InvokeResponse.
+func TakeRawHTTPResponse(ctx context.Context) (RawHTTPResult, bool) {
+	c := rawHTTPCollectorFromCtx(ctx)
+	if c == nil {
+		return RawHTTPResult{}, false
+	}
+	c.mu.Lock()
+	res := c.result
+	c.mu.Unlock()
+	if !res.Set {
+		return RawHTTPResult{}, false
+	}
+	return res, true
+}
--- a/core/pkg/serverless/raw_http_test.go
+++ b/core/pkg/serverless/raw_http_test.go
@ -0,0 +1,129 @@
+package serverless
+
+import (
+	"bytes"
+	"context"
+	"strings"
+	"testing"
+)
+
+func TestSetRawHTTPResponse_happyPath(t *testing.T) {
+	ctx := WithRawHTTPCollector(context.Background())
+
+	headers := map[string]string{"Content-Type": "application/json"}
+	body := []byte(`{"jsonrpc":"2.0","result":42}`)
+	if err := SetRawHTTPResponse(ctx, 200, headers, body); err != nil {
+		t.Fatalf("SetRawHTTPResponse: unexpected error: %v", err)
+	}
+
+	res, ok := TakeRawHTTPResponse(ctx)
+	if !ok {
+		t.Fatal("TakeRawHTTPResponse: expected a response to be set")
+	}
+	if res.Status != 200 {
+		t.Errorf("status = %d, want 200", res.Status)
+	}
+	if res.Headers["Content-Type"] != "application/json" {
+		t.Errorf("Content-Type header = %q, want application/json", res.Headers["Content-Type"])
+	}
+	if !bytes.Equal(res.Body, body) {
+		t.Errorf("body = %q, want %q", res.Body, body)
+	}
+}
+
+func TestSetRawHTTPResponse_copiesBodyAndHeaders(t *testing.T) {
+	ctx := WithRawHTTPCollector(context.Background())
+
+	headers := map[string]string{"X-Test": "v1"}
+	body := []byte("original")
+	if err := SetRawHTTPResponse(ctx, 200, headers, body); err != nil {
+		t.Fatalf("SetRawHTTPResponse: %v", err)
+	}
+
+	// Mutate caller-owned buffers AFTER the call — the stored copy must not change.
+	body[0] = 'X'
+	headers["X-Test"] = "mutated"
+
+	res, _ := TakeRawHTTPResponse(ctx)
+	if string(res.Body) != "original" {
+		t.Errorf("body was not copied: got %q", res.Body)
+	}
+	if res.Headers["X-Test"] != "v1" {
+		t.Errorf("headers were not copied: got %q", res.Headers["X-Test"])
+	}
+}
+
+func TestSetRawHTTPResponse_noCollector(t *testing.T) {
+	// No collector attached → the function is not in raw mode; must error.
+	err := SetRawHTTPResponse(context.Background(), 200, nil, []byte("x"))
+	if err == nil {
+		t.Fatal("expected error when no collector is attached")
+	}
+	if !strings.Contains(err.Error(), "raw_http_response") {
+		t.Errorf("error = %q, want it to mention raw_http_response", err.Error())
+	}
+}
+
+func TestSetRawHTTPResponse_rejectsBadStatus(t *testing.T) {
+	for _, status := range []int{0, 99, 600, 1000, -1} {
+		ctx := WithRawHTTPCollector(context.Background())
+		if err := SetRawHTTPResponse(ctx, status, nil, nil); err == nil {
+			t.Errorf("status %d: expected validation error, got nil", status)
+		}
+		if _, ok := TakeRawHTTPResponse(ctx); ok {
+			t.Errorf("status %d: response should not be set after a rejected status", status)
+		}
+	}
+}
+
+func TestSetRawHTTPResponse_rejectsTooManyHeaders(t *testing.T) {
+	ctx := WithRawHTTPCollector(context.Background())
+	headers := make(map[string]string, rawHTTPMaxHeaders+1)
+	for i := 0; i <= rawHTTPMaxHeaders; i++ {
+		headers["h"+string(rune('a'+i%26))+string(rune('0'+i/26))] = "v"
+	}
+	if len(headers) <= rawHTTPMaxHeaders {
+		t.Fatalf("test setup: expected > %d headers, got %d", rawHTTPMaxHeaders, len(headers))
+	}
+	if err := SetRawHTTPResponse(ctx, 200, headers, nil); err == nil {
+		t.Fatal("expected error for too many headers")
+	}
+}
+
+func TestSetRawHTTPResponse_rejectsOversizedBody(t *testing.T) {
+	ctx := WithRawHTTPCollector(context.Background())
+	body := make([]byte, rawHTTPMaxBodyBytes+1)
+	if err := SetRawHTTPResponse(ctx, 200, nil, body); err == nil {
+		t.Fatal("expected error for oversized body")
+	}
+}
+
+func TestTakeRawHTTPResponse_notSet(t *testing.T) {
+	// Collector attached but set_http_response never called → (zero, false).
+	ctx := WithRawHTTPCollector(context.Background())
+	if _, ok := TakeRawHTTPResponse(ctx); ok {
+		t.Fatal("expected ok=false when no response was set")
+	}
+
+	// No collector at all → also (zero, false).
+	if _, ok := TakeRawHTTPResponse(context.Background()); ok {
+		t.Fatal("expected ok=false with no collector")
+	}
+}
+
+func TestSetRawHTTPResponse_lastWriteWins(t *testing.T) {
+	ctx := WithRawHTTPCollector(context.Background())
+	if err := SetRawHTTPResponse(ctx, 200, nil, []byte("first")); err != nil {
+		t.Fatalf("first SetRawHTTPResponse: %v", err)
+	}
+	if err := SetRawHTTPResponse(ctx, 503, map[string]string{"Retry-After": "5"}, []byte("second")); err != nil {
+		t.Fatalf("second SetRawHTTPResponse: %v", err)
+	}
+	res, ok := TakeRawHTTPResponse(ctx)
+	if !ok {
+		t.Fatal("expected response to be set")
+	}
+	if res.Status != 503 || string(res.Body) != "second" || res.Headers["Retry-After"] != "5" {
+		t.Errorf("last-write-wins failed: got status=%d body=%q headers=%v", res.Status, res.Body, res.Headers)
+	}
+}
--- a/core/pkg/serverless/registry.go
+++ b/core/pkg/serverless/registry.go
@ -107,8 +107,9 @@ func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmByt
 			memory_limit_mb, timeout_seconds, is_public,
 			retry_count, retry_delay_seconds, dlq_topic,
 			status, created_at, updated_at, created_by,
-			ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
-		) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+			ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+			raw_http_response
+		) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
 	`
 	_, err = r.db.Exec(ctx, query,
 		id, fn.Name, fn.Namespace, version, wasmCID,
@ -116,6 +117,7 @@ func (r *Registry) Register(ctx context.Context, fn *FunctionDefinition, wasmByt
 		fn.RetryCount, retryDelay, fn.DLQTopic,
 		string(FunctionStatusActive), now, now, fn.Namespace,
 		fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn,
+		fn.RawHTTPResponse,
 	)
 	if err != nil {
 		return nil, &DeployError{FunctionName: fn.Name, Cause: fmt.Errorf("failed to register function: %w", err)}
@ -154,7 +156,8 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
 				memory_limit_mb, timeout_seconds, is_public,
 				retry_count, retry_delay_seconds, dlq_topic,
 				status, created_at, updated_at, created_by,
-				ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
+				ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+			raw_http_response
 			FROM functions
 			WHERE namespace = ? AND name = ? AND status = ?
 			ORDER BY version DESC
@ -167,7 +170,8 @@ func (r *Registry) Get(ctx context.Context, namespace, name string, version int)
 				memory_limit_mb, timeout_seconds, is_public,
 				retry_count, retry_delay_seconds, dlq_topic,
 				status, created_at, updated_at, created_by,
-				ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
+				ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+			raw_http_response
 			FROM functions
 			WHERE namespace = ? AND name = ? AND version = ?
 		`
@ -197,7 +201,8 @@ func (r *Registry) List(ctx context.Context, namespace string) ([]*Function, err
 			f.memory_limit_mb, f.timeout_seconds, f.is_public,
 			f.retry_count, f.retry_delay_seconds, f.dlq_topic,
 			f.status, f.created_at, f.updated_at, f.created_by,
-			f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn
+			f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn,
+			f.raw_http_response
 		FROM functions f
 		INNER JOIN (
 			SELECT namespace, name, MAX(version) as max_version
@ -668,6 +673,11 @@ func (r *Registry) rowToFunction(row *functionRow) *Function {
 		WSIdleTimeoutSec:     row.WSIdleTimeoutSec,
 		WSMaxFrameBytes:      row.WSMaxFrameBytes,
 		WSMaxInflightPerConn: row.WSMaxInflightPerConn,
+
+		// Raw-HTTP-response mode (bugboard #835). Without reading this back
+		// the invoke handler's `if fn.RawHTTPResponse` engine branch never
+		// fires and set_http_response is a no-op for every function.
+		RawHTTPResponse: row.RawHTTPResponse,
 	}
 }

@ -716,6 +726,11 @@ type functionRow struct {
 	WSIdleTimeoutSec     int  `db:"ws_idle_timeout_sec"`
 	WSMaxFrameBytes      int  `db:"ws_max_frame_bytes"`
 	WSMaxInflightPerConn int  `db:"ws_max_inflight_per_conn"`
+
+	// Raw-HTTP-response mode (bugboard #835). Backed by migration
+	// 029_raw_http_response.sql; defaults to false so existing functions
+	// keep the JSON/Ack-wrapped behavior.
+	RawHTTPResponse bool `db:"raw_http_response"`
 }

 type envVarRow struct {
--- a/core/pkg/serverless/registry/function_store.go
+++ b/core/pkg/serverless/registry/function_store.go
@ -57,8 +57,9 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
 			memory_limit_mb, timeout_seconds, is_public,
 			retry_count, retry_delay_seconds, dlq_topic,
 			status, created_at, updated_at, created_by,
-			ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn
-		) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+			ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+			raw_http_response
+		) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
 	`
 	_, err := s.db.Exec(ctx, query,
 		id, fn.Name, fn.Namespace, version, wasmCID,
@ -66,6 +67,7 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
 		fn.RetryCount, retryDelay, fn.DLQTopic,
 		string(FunctionStatusActive), now, now, fn.Namespace,
 		fn.WSPersistent, fn.WSIdleTimeoutSec, fn.WSMaxFrameBytes, fn.WSMaxInflightPerConn,
+		fn.RawHTTPResponse,
 	)
 	if err != nil {
 		return nil, fmt.Errorf("failed to save function: %w", err)
@ -101,6 +103,7 @@ func (s *FunctionStore) Save(ctx context.Context, fn *FunctionDefinition, wasmCI
 		WSIdleTimeoutSec:     fn.WSIdleTimeoutSec,
 		WSMaxFrameBytes:      fn.WSMaxFrameBytes,
 		WSMaxInflightPerConn: fn.WSMaxInflightPerConn,
+		RawHTTPResponse:      fn.RawHTTPResponse,
 	}, nil
 }

@ -114,7 +117,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version

 	if version == 0 {
 		query = `
-			SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+			SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
 				memory_limit_mb, timeout_seconds, is_public,
 				retry_count, retry_delay_seconds, dlq_topic,
 				status, created_at, updated_at, created_by
@ -126,7 +129,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
 		args = []interface{}{namespace, name, string(FunctionStatusActive)}
 	} else {
 		query = `
-			SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+			SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
 				memory_limit_mb, timeout_seconds, is_public,
 				retry_count, retry_delay_seconds, dlq_topic,
 				status, created_at, updated_at, created_by
@ -154,7 +157,7 @@ func (s *FunctionStore) Get(ctx context.Context, namespace, name string, version
 // GetByID retrieves a function by its ID.
 func (s *FunctionStore) GetByID(ctx context.Context, id string) (*Function, error) {
 	query := `
-		SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+		SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
 			memory_limit_mb, timeout_seconds, is_public,
 			retry_count, retry_delay_seconds, dlq_topic,
 			status, created_at, updated_at, created_by
@ -180,7 +183,7 @@ func (s *FunctionStore) GetByNameInternal(ctx context.Context, namespace, name s
 	name = strings.TrimSpace(name)

 	query := `
-		SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+		SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
 			memory_limit_mb, timeout_seconds, is_public,
 			retry_count, retry_delay_seconds, dlq_topic,
 			status, created_at, updated_at, created_by
@ -207,6 +210,7 @@ func (s *FunctionStore) List(ctx context.Context, namespace string) ([]*Function
 	query := `
 		SELECT f.id, f.name, f.namespace, f.version, f.wasm_cid, f.source_cid,
 			f.ws_persistent, f.ws_idle_timeout_sec, f.ws_max_frame_bytes, f.ws_max_inflight_per_conn,
+			f.raw_http_response,
 			f.memory_limit_mb, f.timeout_seconds, f.is_public,
 			f.retry_count, f.retry_delay_seconds, f.dlq_topic,
 			f.status, f.created_at, f.updated_at, f.created_by
@ -238,7 +242,7 @@ func (s *FunctionStore) List(ctx context.Context, namespace string) ([]*Function
 // ListVersions returns all versions of a function.
 func (s *FunctionStore) ListVersions(ctx context.Context, namespace, name string) ([]*Function, error) {
 	query := `
-		SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn,
+		SELECT id, name, namespace, version, wasm_cid, source_cid, ws_persistent, ws_idle_timeout_sec, ws_max_frame_bytes, ws_max_inflight_per_conn, raw_http_response,
 			memory_limit_mb, timeout_seconds, is_public,
 			retry_count, retry_delay_seconds, dlq_topic,
 			status, created_at, updated_at, created_by
@ -399,5 +403,6 @@ func rowToFunction(row *functionRow) *Function {
 		WSIdleTimeoutSec:     row.WSIdleTimeoutSec,
 		WSMaxFrameBytes:      row.WSMaxFrameBytes,
 		WSMaxInflightPerConn: row.WSMaxInflightPerConn,
+		RawHTTPResponse:      row.RawHTTPResponse,
 	}
 }
--- a/core/pkg/serverless/registry/types.go
+++ b/core/pkg/serverless/registry/types.go
@ -38,6 +38,9 @@ type FunctionDefinition struct {
 	WSIdleTimeoutSec     int
 	WSMaxFrameBytes      int
 	WSMaxInflightPerConn int
+
+	// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835).
+	RawHTTPResponse bool
 }

 // Function represents a deployed serverless function.
@ -64,6 +67,9 @@ type Function struct {
 	WSIdleTimeoutSec     int
 	WSMaxFrameBytes      int
 	WSMaxInflightPerConn int
+
+	// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835).
+	RawHTTPResponse bool
 }

 // LogEntry represents a log message emitted from inside a WASM function
@ -180,6 +186,7 @@ type functionRow struct {
 	WSIdleTimeoutSec     int
 	WSMaxFrameBytes      int
 	WSMaxInflightPerConn int
+	RawHTTPResponse      bool
 }

 type envVarRow struct {
--- a/core/pkg/serverless/registry_raw_http_test.go
+++ b/core/pkg/serverless/registry_raw_http_test.go
@ -0,0 +1,34 @@
+package serverless
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestRegistryRowMapping_IncludesRawHTTPResponse guards the raw-HTTP-response
+// column (bugboard #835): rowToFunction must copy raw_http_response off the DB
+// row, otherwise the engine's `if fn.RawHTTPResponse` branch never attaches a
+// collector and set_http_response is a permanent no-op for every function.
+func TestRegistryRowMapping_IncludesRawHTTPResponse(t *testing.T) {
+	row := functionRow{RawHTTPResponse: true}
+	r := &Registry{}
+	fn := r.rowToFunction(&row)
+	if !fn.RawHTTPResponse {
+		t.Error("rowToFunction did not propagate RawHTTPResponse — raw-HTTP functions would silently fall back to JSON/Ack output (bugboard #835)")
+	}
+}
+
+// TestRegistry_QueriesRawHTTPResponseColumn is the SQL-text drift guard: the
+// raw_http_response column must appear in the INSERT plus every READ-path
+// SELECT, mirroring the ws_* column guard. Counted ≥5 (one INSERT + the
+// Get/GetByID/List/ListVersions/getByNameInternal SELECTs).
+func TestRegistry_QueriesRawHTTPResponseColumn(t *testing.T) {
+	source, err := readRegistrySource()
+	if err != nil {
+		t.Skipf("cannot read registry.go for SQL inspection: %v", err)
+	}
+	count := strings.Count(source, "raw_http_response")
+	if count < 5 {
+		t.Errorf("column raw_http_response appears in registry.go only %d times; expected ≥5 (INSERT + each SELECT path). A READ path probably dropped it and raw-HTTP functions will silently fall back to JSON output.", count)
+	}
+}
--- a/core/pkg/serverless/triggers/dispatch_local_dedup_integration_test.go
+++ b/core/pkg/serverless/triggers/dispatch_local_dedup_integration_test.go
@ -0,0 +1,159 @@
+package triggers
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	olriclib "github.com/olric-data/olric"
+	"github.com/olric-data/olric/stats"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+	"go.uber.org/zap/zaptest/observer"
+)
+
+// failingOlricClient is a minimal olric.Client whose NewDMap always errors,
+// simulating an Olric backend that is configured but unavailable — the
+// degraded path bugboard #555 must surface (fail-open + rate-limited WARN).
+type failingOlricClient struct{}
+
+func (failingOlricClient) NewDMap(string, ...olriclib.DMapOption) (olriclib.DMap, error) {
+	return nil, fmt.Errorf("olric unavailable (test)")
+}
+func (failingOlricClient) NewPubSub(...olriclib.PubSubOption) (*olriclib.PubSub, error) {
+	return nil, fmt.Errorf("not implemented")
+}
+func (failingOlricClient) Stats(context.Context, string, ...olriclib.StatsOption) (stats.Stats, error) {
+	return stats.Stats{}, fmt.Errorf("not implemented")
+}
+func (failingOlricClient) Ping(context.Context, string, string) (string, error) {
+	return "", fmt.Errorf("not implemented")
+}
+func (failingOlricClient) RoutingTable(context.Context) (olriclib.RoutingTable, error) {
+	return nil, fmt.Errorf("not implemented")
+}
+func (failingOlricClient) Members(context.Context) ([]olriclib.Member, error) {
+	return nil, fmt.Errorf("not implemented")
+}
+func (failingOlricClient) RefreshMetadata(context.Context) error { return nil }
+func (failingOlricClient) Close(context.Context) error           { return nil }
+
+var _ olriclib.Client = failingOlricClient{}
+
+// Bugboard #555 — duplicate push from the dispatcher firing twice.
+//
+// These exercise Dispatch's local-dedup short-circuit and the
+// degraded-dedup WARN. They use a nil-db store: getMatches would panic on
+// the nil rqlite.Client, so "did we reach getMatches?" is observable as
+// "did Dispatch panic?". The local dedup runs BEFORE getMatches, so a
+// deduped call must return cleanly without touching the store.
+
+func TestDispatch_localDedupSkipsSecondInvokeSameNode(t *testing.T) {
+	logger := zap.NewNop()
+	store := NewPubSubTriggerStore(nil, logger) // nil db: getMatches panics if reached
+	d := NewPubSubDispatcher(store, nil, nil, nil, logger)
+
+	ns, topic, data := "anchat", "messages:new", []byte(`{"messageId":"m1"}`)
+
+	// First publish: NOT deduped → reaches getMatches → nil-db panic. We
+	// recover and confirm we got past the dedup gate.
+	reachedStore := false
+	func() {
+		defer func() {
+			if recover() != nil {
+				reachedStore = true
+			}
+		}()
+		d.Dispatch(context.Background(), ns, topic, data, 0)
+	}()
+	if !reachedStore {
+		t.Fatal("first publish must pass the dedup gate and reach the store lookup")
+	}
+
+	// Second IDENTICAL publish within the TTL: MUST be deduped locally and
+	// return BEFORE getMatches — so no panic this time.
+	dedupedClean := true
+	func() {
+		defer func() {
+			if recover() != nil {
+				dedupedClean = false
+			}
+		}()
+		d.Dispatch(context.Background(), ns, topic, data, 0)
+	}()
+	if !dedupedClean {
+		t.Error("BUG #555 REGRESSION: identical second publish on the same node " +
+			"must be deduped locally and NOT re-dispatch")
+	}
+}
+
+func TestDispatch_distinctPayloadsBothDispatch(t *testing.T) {
+	logger := zap.NewNop()
+	store := NewPubSubTriggerStore(nil, logger)
+	d := NewPubSubDispatcher(store, nil, nil, nil, logger)
+
+	ns, topic := "anchat", "messages:new"
+
+	for _, data := range [][]byte{[]byte(`{"messageId":"a"}`), []byte(`{"messageId":"b"}`)} {
+		reachedStore := false
+		func() {
+			defer func() {
+				if recover() != nil {
+					reachedStore = true
+				}
+			}()
+			d.Dispatch(context.Background(), ns, topic, data, 0)
+		}()
+		if !reachedStore {
+			t.Errorf("distinct payload %q must NOT be deduped — it must reach dispatch", data)
+		}
+	}
+}
+
+func TestClaimDispatch_degradedWarnWhenOlricDown(t *testing.T) {
+	// Olric "configured but failing" path: a non-nil client whose NewDMap
+	// errors. claimDispatch must STILL fire (fail-open) AND emit a WARN so
+	// operators can see cross-node dedup is degraded.
+	core, observed := observer.New(zapcore.WarnLevel)
+	d := &PubSubDispatcher{
+		logger:      zap.New(core),
+		olricClient: failingOlricClient{},
+	}
+
+	if !d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x")) {
+		t.Fatal("claimDispatch must fail-open (true) when Olric is degraded — never drop the wake")
+	}
+	if observed.FilterMessageSnippet("dedup degraded").Len() == 0 {
+		t.Error("degraded Olric path must emit a WARN naming the degradation, not stay silent")
+	}
+}
+
+func TestClaimDispatch_degradedWarnRateLimited(t *testing.T) {
+	// A sustained outage must NOT flood the log: only one WARN per interval.
+	core, observed := observer.New(zapcore.WarnLevel)
+	d := &PubSubDispatcher{
+		logger:      zap.New(core),
+		olricClient: failingOlricClient{},
+	}
+
+	for i := 0; i < 5; i++ {
+		d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x"))
+	}
+	if got := observed.FilterMessageSnippet("dedup degraded").Len(); got != 1 {
+		t.Errorf("degraded WARN must be rate-limited to 1 per interval; got %d", got)
+	}
+}
+
+func TestClaimDispatch_nilOlricStaysQuiet(t *testing.T) {
+	// nil Olric is a NORMAL single-node / cache-disabled config, not a
+	// degraded multi-node cluster. It must fire but NOT warn (avoid noise).
+	core, observed := observer.New(zapcore.WarnLevel)
+	d := &PubSubDispatcher{logger: zap.New(core)} // olricClient nil
+
+	if !d.claimDispatch(context.Background(), "ns", "messages:new", []byte("x")) {
+		t.Fatal("nil Olric must fail-open (true)")
+	}
+	if observed.Len() != 0 {
+		t.Errorf("nil Olric is a normal config and must NOT emit a degraded WARN; got %d logs", observed.Len())
+	}
+}
--- a/core/pkg/serverless/triggers/dispatcher.go
+++ b/core/pkg/serverless/triggers/dispatcher.go
@ -134,8 +134,24 @@ type PubSubDispatcher struct {
 	// stopCh signals the periodic Refresh goroutine to exit.
 	stopCh   chan struct{}
 	stopOnce sync.Once
+
+	// localDedup guards against a SINGLE node invoking the same publish
+	// twice (e.g. gossipsub self-delivery), independent of Olric health.
+	// Bugboard #555. Always non-nil after NewPubSubDispatcher.
+	localDedup *localDedupCache
+
+	// degradedDedupWarn rate-limits the "Olric dedup degraded" WARN so a
+	// misconfigured cluster doesn't flood the log on every publish.
+	// Bugboard #555.
+	degradedDedupMu       sync.Mutex
+	degradedDedupLastWarn time.Time
 }

+// degradedDedupWarnInterval rate-limits the cross-node dedup-degraded WARN
+// (bugboard #555). One warning per interval is enough to alert operators
+// without flooding the log under high publish volume.
+const degradedDedupWarnInterval = 60 * time.Second
+
 // NewPubSubDispatcher creates a new PubSub trigger dispatcher.
 //
 // The `ps` argument may be nil (e.g. in tests, or namespaces with pubsub
@ -158,6 +174,7 @@ func NewPubSubDispatcher(
 		logger:         logger,
 		subscribedKeys: make(map[string]bool),
 		stopCh:         make(chan struct{}),
+		localDedup:     newLocalDedupCache(),
 	}
 }

@ -337,6 +354,20 @@ func (d *PubSubDispatcher) Dispatch(ctx context.Context, namespace, topic string
 		return
 	}

+	// Local once-per-publish dedup (bugboard #555). gossipsub can deliver
+	// the SAME publish to this node's subscribe handler more than once
+	// (self-delivery / fan-out), and the cross-node Olric claim below is a
+	// no-op when Olric is down. This in-process guard ensures a SINGLE node
+	// never invokes the same (namespace, topic, payload) twice, regardless
+	// of Olric health.
+	dedupKey := dispatchDedupKey(namespace, topic, data)
+	if !d.localDedup.claim(dedupKey) {
+		d.logger.Debug("PubSub dispatch deduped (local duplicate on this node)",
+			zap.String("namespace", namespace),
+			zap.String("topic", topic))
+		return
+	}
+
 	// Cluster-wide once-per-publish dedup (bugboard #30). gossipsub
 	// delivers a publish to every subscribed gateway node; only the node
 	// that wins the Olric claim for this (namespace, topic, payload)
@ -580,7 +611,7 @@ func (d *PubSubDispatcher) claimDispatch(ctx context.Context, namespace, topic s
 	}
 	dm, err := d.olricClient.NewDMap(dispatchDedupDMap)
 	if err != nil {
-		d.logger.Debug("dispatch dedup: NewDMap failed, firing (fail-open)", zap.Error(err))
+		d.warnDedupDegraded("NewDMap failed", namespace, topic, err)
 		return true
 	}
 	key := dispatchDedupKey(namespace, topic, data)
@ -594,11 +625,39 @@ func (d *PubSubDispatcher) claimDispatch(ctx context.Context, namespace, topic s
 	// Any other (transient) error: fail-open and fire rather than risk a
 	// dropped wake. Worst case is a duplicate, which is what #30 already
 	// had — never worse.
-	d.logger.Debug("dispatch dedup: claim errored, firing (fail-open)",
-		zap.String("topic", topic), zap.Error(err))
+	d.warnDedupDegraded("claim Put errored", namespace, topic, err)
 	return true
 }

+// warnDedupDegraded emits a rate-limited WARN announcing that cross-node
+// dispatch dedup is degraded (Olric unavailable), so the cluster has fallen
+// back to firing on every node that receives the publish. The local cache
+// still prevents same-node duplicates, but cross-node duplicate pushes are
+// possible until Olric recovers — operators need visibility, not silence
+// (bugboard #555). Rate-limited so a sustained outage doesn't flood logs.
+func (d *PubSubDispatcher) warnDedupDegraded(reason, namespace, topic string, err error) {
+	d.degradedDedupMu.Lock()
+	now := time.Now()
+	shouldWarn := now.Sub(d.degradedDedupLastWarn) >= degradedDedupWarnInterval
+	if shouldWarn {
+		d.degradedDedupLastWarn = now
+	}
+	d.degradedDedupMu.Unlock()
+
+	if !shouldWarn {
+		return
+	}
+	d.logger.Warn("PubSub dispatch dedup degraded: Olric unavailable, "+
+		"falling back to fire-on-every-node — cross-node duplicate pushes "+
+		"possible until the shared store recovers",
+		zap.String("reason", reason),
+		zap.String("namespace", namespace),
+		zap.String("topic", topic),
+		zap.Duration("warn_interval", degradedDedupWarnInterval),
+		zap.Error(err),
+	)
+}
+
 // InvalidateCache is now a no-op — the dispatcher no longer caches lookups.
 // Kept on the type so callers who used it still compile.
 func (d *PubSubDispatcher) InvalidateCache(ctx context.Context, namespace, topic string) {}
--- a/core/pkg/serverless/triggers/local_dedup.go
+++ b/core/pkg/serverless/triggers/local_dedup.go
@ -0,0 +1,108 @@
+package triggers
+
+import (
+	"sync"
+	"time"
+)
+
+// Bugboard #555 — messages:new trigger fires twice (duplicate push).
+//
+// Two distinct bugs produced duplicate dispatches:
+//
+//  1. Cross-node fail-open: claimDispatch (dispatcher.go) coordinates
+//     once-per-publish dispatch via Olric, but FAILS OPEN when Olric is
+//     unavailable/misconfigured. On a multi-node cluster every node that
+//     receives the gossip publish then fires the handler → N duplicate
+//     invocations (AnChat: exactly 2 on a 2-reachable-node cluster).
+//
+//  2. Single-node self-delivery: even on one node, gossipsub can deliver a
+//     locally-originated publish back to the same node's subscribe handler,
+//     and the only guard was the cross-node Olric claim — which is a no-op
+//     when Olric is down.
+//
+// localDedupCache fixes (2) and bounds the blast radius of (1): a single
+// node never invokes the SAME publish twice, regardless of Olric health.
+// It is a small bounded map with per-entry TTL, keyed by the SAME string
+// dispatchDedupKey produces — (namespace, topic, sha256(payload)[:16]).
+//
+// IDENTICAL-PAYLOAD CAVEAT: the key folds the payload hash, NOT a stable
+// message id (gossipsub's message-ID isn't plumbed through the subscribe
+// handler, and parsing an app-specific id would couple the dispatcher to a
+// tenant's JSON schema). So two byte-identical publishes within the TTL
+// window collapse to one local invocation. Real payloads carry a unique id
+// (messageId/seq), so this is not a practical concern; it is the same
+// trade-off documented on dispatchDedupKey.
+const (
+	// localDedupTTL bounds how long a (namespace, topic, payload) claim is
+	// remembered on this node. It must cover gossipsub self-delivery /
+	// fan-out jitter without de-duplicating legitimately-repeated publishes
+	// seconds apart. Kept in lockstep with dispatchDedupTTL.
+	localDedupTTL = 30 * time.Second
+
+	// localDedupMaxEntries caps the cache so a high-throughput namespace
+	// can't grow it without bound. When the cap is hit, expired entries are
+	// swept first; if still full, the claim is allowed through (fail-open —
+	// a rare duplicate is far better than dropping a wake).
+	localDedupMaxEntries = 4096
+)
+
+// localDedupCache is a bounded, TTL'd set of recently-dispatched keys for a
+// single node. Safe for concurrent use.
+type localDedupCache struct {
+	mu      sync.Mutex
+	entries map[string]time.Time // key -> expiry
+	ttl     time.Duration
+	maxSize int
+	now     func() time.Time // injectable clock for tests
+}
+
+// newLocalDedupCache builds a cache with the package default TTL and size.
+func newLocalDedupCache() *localDedupCache {
+	return &localDedupCache{
+		entries: make(map[string]time.Time),
+		ttl:     localDedupTTL,
+		maxSize: localDedupMaxEntries,
+		now:     time.Now,
+	}
+}
+
+// claim records the key and reports whether THIS node may dispatch it now.
+//
+// Returns true the first time a key is seen within the TTL window (caller
+// should dispatch) and false on subsequent calls within the window (caller
+// should skip — it's a local duplicate).
+//
+// Fail-open: if the cache is at capacity and can't be swept enough to make
+// room, claim returns true (allow dispatch) rather than risk dropping a
+// legitimate wake.
+func (c *localDedupCache) claim(key string) bool {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	now := c.now()
+	if exp, ok := c.entries[key]; ok && now.Before(exp) {
+		return false // seen recently → local duplicate → skip
+	}
+
+	// Either unseen or the previous entry expired. Sweep expired entries
+	// before inserting so the map doesn't accumulate dead keys.
+	if len(c.entries) >= c.maxSize {
+		c.sweepExpiredLocked(now)
+	}
+	if len(c.entries) >= c.maxSize {
+		// Still full of live entries — allow dispatch rather than drop.
+		return true
+	}
+
+	c.entries[key] = now.Add(c.ttl)
+	return true
+}
+
+// sweepExpiredLocked removes expired entries. Caller must hold c.mu.
+func (c *localDedupCache) sweepExpiredLocked(now time.Time) {
+	for k, exp := range c.entries {
+		if !now.Before(exp) {
+			delete(c.entries, k)
+		}
+	}
+}
--- a/core/pkg/serverless/triggers/local_dedup_test.go
+++ b/core/pkg/serverless/triggers/local_dedup_test.go
@ -0,0 +1,140 @@
+package triggers
+
+import (
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// Bugboard #555 — a SINGLE node must never invoke the same publish twice,
+// independent of Olric health. These tests pin the local dedup cache's
+// claim/expiry/eviction behavior.
+
+func TestLocalDedupCache_sameKeyClaimedOncePerWindow(t *testing.T) {
+	c := newLocalDedupCache()
+	key := dispatchDedupKey("ns", "messages:new", []byte(`{"id":1}`))
+
+	if !c.claim(key) {
+		t.Fatal("first claim of an unseen key must fire (return true)")
+	}
+	if c.claim(key) {
+		t.Error("second claim within the TTL must be deduped (return false)")
+	}
+}
+
+func TestLocalDedupCache_distinctKeysBothFire(t *testing.T) {
+	c := newLocalDedupCache()
+	a := dispatchDedupKey("ns", "messages:new", []byte("A"))
+	b := dispatchDedupKey("ns", "messages:new", []byte("B"))
+
+	if !c.claim(a) {
+		t.Error("distinct payload A must fire")
+	}
+	if !c.claim(b) {
+		t.Error("distinct payload B must fire (different payload → different key)")
+	}
+}
+
+func TestLocalDedupCache_expiredEntryFiresAgain(t *testing.T) {
+	// Drive a controllable clock so we don't sleep in the test.
+	cur := time.Unix(1_000_000, 0)
+	c := newLocalDedupCache()
+	c.now = func() time.Time { return cur }
+
+	key := dispatchDedupKey("ns", "messages:new", []byte("x"))
+	if !c.claim(key) {
+		t.Fatal("first claim must fire")
+	}
+	if c.claim(key) {
+		t.Fatal("immediate re-claim must be deduped")
+	}
+
+	// Advance past the TTL: the entry has expired, so the same key must
+	// fire again (a legitimately-repeated publish seconds apart).
+	cur = cur.Add(localDedupTTL + time.Second)
+	if !c.claim(key) {
+		t.Error("after TTL expiry the same key must fire again")
+	}
+}
+
+func TestLocalDedupCache_evictsExpiredOnPressure(t *testing.T) {
+	cur := time.Unix(2_000_000, 0)
+	c := &localDedupCache{
+		entries: make(map[string]time.Time),
+		ttl:     localDedupTTL,
+		maxSize: 4, // tiny cap to exercise the sweep path deterministically
+		now:     func() time.Time { return cur },
+	}
+
+	// Fill to capacity with soon-to-expire entries.
+	for i := 0; i < c.maxSize; i++ {
+		key := dispatchDedupKey("ns", "t", []byte{byte(i)})
+		if !c.claim(key) {
+			t.Fatalf("fill claim %d must fire", i)
+		}
+	}
+	if len(c.entries) != c.maxSize {
+		t.Fatalf("expected cache full at %d, got %d", c.maxSize, len(c.entries))
+	}
+
+	// Advance past TTL so every existing entry is expired, then claim a new
+	// key: the sweep must reclaim space and the new key must be recorded.
+	cur = cur.Add(localDedupTTL + time.Second)
+	newKey := dispatchDedupKey("ns", "t", []byte("fresh"))
+	if !c.claim(newKey) {
+		t.Fatal("new key under pressure must fire")
+	}
+	if _, ok := c.entries[newKey]; !ok {
+		t.Error("new key must be recorded after expired entries were swept")
+	}
+	if len(c.entries) > c.maxSize {
+		t.Errorf("cache must not exceed maxSize after sweep; got %d", len(c.entries))
+	}
+}
+
+func TestLocalDedupCache_concurrentClaimsExactlyOneWins(t *testing.T) {
+	// Race condition guard: when many goroutines race to claim the SAME key
+	// (gossipsub delivering one publish across handler goroutines), exactly
+	// one must win. Run under -race to catch unsynchronized map access.
+	c := newLocalDedupCache()
+	key := dispatchDedupKey("ns", "messages:new", []byte(`{"id":"race"}`))
+
+	const goroutines = 64
+	var wins int64
+	var wg sync.WaitGroup
+	wg.Add(goroutines)
+	for i := 0; i < goroutines; i++ {
+		go func() {
+			defer wg.Done()
+			if c.claim(key) {
+				atomic.AddInt64(&wins, 1)
+			}
+		}()
+	}
+	wg.Wait()
+
+	if wins != 1 {
+		t.Errorf("exactly one concurrent claim of the same key must win; got %d", wins)
+	}
+}
+
+func TestLocalDedupCache_failsOpenWhenFullOfLiveEntries(t *testing.T) {
+	cur := time.Unix(3_000_000, 0)
+	c := &localDedupCache{
+		entries: make(map[string]time.Time),
+		ttl:     localDedupTTL,
+		maxSize: 2,
+		now:     func() time.Time { return cur },
+	}
+
+	// Fill with two still-live entries.
+	c.claim(dispatchDedupKey("ns", "t", []byte("a")))
+	c.claim(dispatchDedupKey("ns", "t", []byte("b")))
+
+	// A new key when the cache is full of LIVE entries must fail-open
+	// (fire) rather than drop a legitimate wake.
+	if !c.claim(dispatchDedupKey("ns", "t", []byte("c"))) {
+		t.Error("claim must fail-open (true) when the cache is full of live entries")
+	}
+}
--- a/core/pkg/serverless/types.go
+++ b/core/pkg/serverless/types.go
@ -237,6 +237,11 @@ type FunctionDefinition struct {
 	WSIdleTimeoutSec     int  `json:"ws_idle_timeout_sec,omitempty"`     // 0 = no idle timeout
 	WSMaxFrameBytes      int  `json:"ws_max_frame_bytes,omitempty"`      // 0 = use default 256 KB
 	WSMaxInflightPerConn int  `json:"ws_max_inflight_per_conn,omitempty"` // 0 = use default 64
+
+	// RawHTTPResponse enables raw-HTTP-response mode (bugboard #835): the
+	// function may call set_http_response to emit a verbatim status/headers/
+	// body instead of the JSON/Ack-wrapped output. See pkg/serverless/raw_http.go.
+	RawHTTPResponse bool `json:"raw_http_response,omitempty"`
 }

 // DBTriggerConfig defines a database trigger configuration.
@ -270,6 +275,11 @@ type Function struct {
 	WSIdleTimeoutSec     int  `json:"ws_idle_timeout_sec,omitempty"`
 	WSMaxFrameBytes      int  `json:"ws_max_frame_bytes,omitempty"`
 	WSMaxInflightPerConn int  `json:"ws_max_inflight_per_conn,omitempty"`
+
+	// RawHTTPResponse — bugboard #835. When true, the function may emit a
+	// verbatim HTTP response via set_http_response instead of the
+	// JSON/Ack-wrapped output. See pkg/serverless/raw_http.go.
+	RawHTTPResponse bool `json:"raw_http_response,omitempty"`
 }

 // InvocationContext provides context for a function invocation.
@ -308,6 +318,14 @@ type InvocationContext struct {
 	// could create by publishing topics that match its own wildcard
 	// trigger (bugboard #93 follow-up).
 	TriggerDepth int `json:"trigger_depth,omitempty"`
+
+	// RawHTTP carries a verbatim HTTP response set by a RawHTTPResponse
+	// function (bugboard #835). The engine populates this from the
+	// per-invocation collector after Execute returns; the Invoker surfaces
+	// it on InvokeResponse so the HTTP handler can replay it. nil/unset for
+	// normal functions and functions that didn't call set_http_response.
+	// Not serialized — internal plumbing only.
+	RawHTTP *RawHTTPResult `json:"-"`
 }

 // InvocationResult represents the result of a function invocation.
@ -555,6 +573,28 @@ type HostServices interface {
 	// in OnClose unless they want to dynamically unsubscribe.
 	WSPubSubUnbridge(ctx context.Context, clientID, topic string) error

+	// SetHTTPResponse records a verbatim HTTP response (status, headers, body)
+	// for a RawHTTPResponse function (bugboard #835). The HTTP invoke handler
+	// replays it byte-for-byte instead of the JSON/Ack-wrapped output, so a
+	// function can transparently proxy an upstream RPC. Returns an error when
+	// the function is NOT deployed with raw_http_response, or when the status /
+	// header count / body size fail validation. headers may be nil.
+	SetHTTPResponse(ctx context.Context, status int, headers map[string]string, body []byte) error
+
+	// EphemeralStateSet records WS-subscribe-tracked ephemeral state owned by
+	// the current invocation's WS client (bugboard #710) and publishes a "set"
+	// event on the topic so subscribers observe it. The state auto-clears (with
+	// a synthetic "clear" event) when the owning WS client disconnects, and
+	// also expires after ttlMs (clamped to a max; <=0 uses a default). Returns
+	// an error when there is no WS client in context, on empty topic/key, on an
+	// oversized payload, or when the client's per-connection key cap is hit.
+	EphemeralStateSet(ctx context.Context, topic, key string, payload []byte, ttlMs int64) error
+
+	// EphemeralStateClear removes ephemeral state the current WS client owns
+	// and publishes a "clear" event. Idempotent: clearing a missing or
+	// non-owned key is a no-op. Errors only on no-WS-client / empty topic-key.
+	EphemeralStateClear(ctx context.Context, topic, key string) error
+
 	// WebSocket operations (only valid in WS context)
 	WSSend(ctx context.Context, clientID string, data []byte) error
 	WSBroadcast(ctx context.Context, topic string, data []byte) error
--- a/core/pkg/serverless/websocket.go
+++ b/core/pkg/serverless/websocket.go
@ -23,6 +23,14 @@ type WSManager struct {
 	subscriptions   map[string]map[string]struct{}
 	subscriptionsMu sync.RWMutex

+	// disconnectHooks run (synchronously) on Unregister for each client,
+	// AFTER the connection + subscriptions are torn down. Used by the
+	// ephemeral-state store (bugboard #710) to auto-clear a client's owned
+	// state on disconnect. Both the stateless and persistent WS handlers
+	// call Unregister, so a single hook covers both paths.
+	disconnectHooks   []func(clientID string)
+	disconnectHooksMu sync.RWMutex
+
 	logger *zap.Logger
 }

@ -102,6 +110,20 @@ func (m *WSManager) Register(clientID string, conn WebSocketConn) {
 	)
 }

+// AddDisconnectHook registers a callback fired (synchronously) for every
+// client passed to Unregister, after its connection + subscriptions are torn
+// down. Used to auto-clear WS-subscribe-tracked ephemeral state on disconnect
+// (bugboard #710). Hooks must be cheap and non-blocking — they run inline on
+// the WS read loop's teardown path. Register once at gateway init.
+func (m *WSManager) AddDisconnectHook(hook func(clientID string)) {
+	if hook == nil {
+		return
+	}
+	m.disconnectHooksMu.Lock()
+	m.disconnectHooks = append(m.disconnectHooks, hook)
+	m.disconnectHooksMu.Unlock()
+}
+
 // Unregister removes a WebSocket connection and its subscriptions.
 func (m *WSManager) Unregister(clientID string) {
 	m.connectionsMu.Lock()
@ -130,6 +152,14 @@ func (m *WSManager) Unregister(clientID string) {
 	// Close the connection
 	_ = conn.conn.Close()

+	// Fire disconnect hooks (ephemeral-state auto-clear, bugboard #710).
+	m.disconnectHooksMu.RLock()
+	hooks := m.disconnectHooks
+	m.disconnectHooksMu.RUnlock()
+	for _, hook := range hooks {
+		hook(clientID)
+	}
+
 	m.logger.Debug("Unregistered WebSocket connection",
 		zap.String("client_id", clientID),
 		zap.Int("remaining_connections", m.GetConnectionCount()),