mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-06-16 22:54:12 +00:00
fix(config): add secrets_encryption_key to HTTPGatewayConfig — fixes orama-node boot crash
v0.122.42 (f412425, secrets encryption) shipped the template emission, the per-cluster secret generator, and the gateway.Config consumer — but NOT the parse field on config.HTTPGatewayConfig. Phase 4 writes `secrets_encryption_key` into node.yaml under the http_gateway section, and pkg/config/yaml.go decodes with KnownFields(true) (strict). The unknown field made every node.yaml parse fail, so orama-node exited 1 on every start and systemd crash-looped it (restart counter hit 380+ on the first upgraded devnet node before the rolling controller halted). Root cause: a generated-config field with no matching struct field under strict unmarshal. Fix is the missing field. The runtime key itself is still consumed from ~/.orama/secrets/secrets-encryption-key (pkg/node/ gateway.go), which already worked — so this one-field addition fully restores boot AND the feature. The standalone gateway (cmd/gateway/config.go) uses lenient parsing and was unaffected. Regression test in pkg/config/decode_test.go decodes a node.yaml carrying secrets_encryption_key under strict mode.
This commit is contained in:
parent
b6b518e005
commit
e685c864fc
@ -207,3 +207,30 @@ key2: value2
|
||||
t.Errorf("expected key2='value2', got %q", result["key2"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestDecodeStrict_secretsEncryptionKey is the regression guard for the
|
||||
// v0.122.42 boot crash: Phase 4 config generation writes
|
||||
// `secrets_encryption_key` into node.yaml under the http_gateway section,
|
||||
// but HTTPGatewayConfig had no matching field. With KnownFields(true)
|
||||
// strict decoding, the unknown field made DecodeStrict fail and
|
||||
// orama-node crash-looped (exit 1) on every start. The field must parse.
|
||||
func TestDecodeStrict_secretsEncryptionKey(t *testing.T) {
|
||||
yamlInput := `
|
||||
node:
|
||||
id: "test-node"
|
||||
data_dir: "./data"
|
||||
http_gateway:
|
||||
enabled: true
|
||||
client_namespace: "default"
|
||||
rqlite_dsn: "http://localhost:5001"
|
||||
secrets_encryption_key: "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
|
||||
`
|
||||
var cfg Config
|
||||
if err := DecodeStrict(strings.NewReader(yamlInput), &cfg); err != nil {
|
||||
t.Fatalf("node.yaml with secrets_encryption_key must parse (v0.122.42 regression), got: %v", err)
|
||||
}
|
||||
want := "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
|
||||
if cfg.HTTPGateway.SecretsEncryptionKey != want {
|
||||
t.Errorf("SecretsEncryptionKey = %q, want %q", cfg.HTTPGateway.SecretsEncryptionKey, want)
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,6 +21,15 @@ type HTTPGatewayConfig struct {
|
||||
IPFSTimeout time.Duration `yaml:"ipfs_timeout"` // Timeout for IPFS operations
|
||||
BaseDomain string `yaml:"base_domain"` // Base domain for deployments (e.g., "dbrs.space"). Defaults to "dbrs.space"
|
||||
|
||||
// SecretsEncryptionKey is the AES-256 key (hex, 64 chars) used to encrypt
|
||||
// serverless function secrets at rest. Generated per-cluster and written
|
||||
// into node.yaml by Phase 4 config generation. This field MUST exist or
|
||||
// strict YAML unmarshal rejects node.yaml entirely and orama-node fails
|
||||
// to boot (regression that shipped in v0.122.42: template + secret
|
||||
// generator + gateway.Config consumer all landed, but this parse field
|
||||
// and the node→gateway mapping were missed).
|
||||
SecretsEncryptionKey string `yaml:"secrets_encryption_key"`
|
||||
|
||||
// WebRTC configuration (optional, enabled per-namespace)
|
||||
WebRTC WebRTCConfig `yaml:"webrtc"`
|
||||
}
|
||||
|
||||
@ -468,6 +468,13 @@ func initializeServerless(logger *logging.ColoredLogger, cfg *Config, deps *Depe
|
||||
engineCfg.DefaultTimeoutSeconds = 30
|
||||
engineCfg.MaxTimeoutSeconds = 60
|
||||
engineCfg.ModuleCacheSize = 100
|
||||
// Surface the per-phase slow-invoke diagnostic (instantiate_ms / run_ms)
|
||||
// above 1s instead of the 5s default — a >1s serverless invocation is
|
||||
// genuinely slow (well-built handlers are <300ms), and this makes the
|
||||
// cold-start floor (bugboard #27: async-dispatched stateless handlers pay a
|
||||
// fresh instantiate + TinyGo _start per call) visible for correlation
|
||||
// against client-side request_ids.
|
||||
engineCfg.SlowInvokeThresholdMs = 1000
|
||||
|
||||
// Create secrets manager for serverless functions (AES-256-GCM encrypted).
|
||||
//
|
||||
|
||||
@ -56,6 +56,14 @@ type Config struct {
|
||||
ModuleCacheSize int `yaml:"module_cache_size"` // Number of compiled modules to cache
|
||||
EnablePrewarm bool `yaml:"enable_prewarm"` // Pre-compile frequently used functions
|
||||
|
||||
// SlowInvokeThresholdMs is the wall-clock (ms) above which Execute emits the
|
||||
// per-phase "slow invocation" diagnostic (bugboard #24/#27). Default 5000.
|
||||
// Lower it (e.g. 750) to surface the sub-second cold-start floor that the
|
||||
// 5s default hides — async-dispatched stateless handlers pay a fresh
|
||||
// instantiate + TinyGo _start per call, which a count=0 read makes visible
|
||||
// as ~1s of execute time with ~0 module-load (compile is cached). See #27.
|
||||
SlowInvokeThresholdMs int `yaml:"slow_invoke_threshold_ms"`
|
||||
|
||||
// Secrets encryption
|
||||
SecretsEncryptionKey string `yaml:"secrets_encryption_key"` // AES-256 key (32 bytes, hex-encoded)
|
||||
|
||||
@ -79,6 +87,12 @@ type Config struct {
|
||||
// `*/1 * * * * *` to fire on every-second cadence.
|
||||
const MinCronPollInterval = 100 * time.Millisecond
|
||||
|
||||
// defaultSlowInvokeThresholdMs is the default wall-clock (ms) above which the
|
||||
// per-phase slow-invocation diagnostic fires. 5s keeps normal traffic quiet
|
||||
// while still firing before the 30s WS ceiling; lower it on a cluster under
|
||||
// investigation to surface sub-second cold-start floors.
|
||||
const defaultSlowInvokeThresholdMs = 5000
|
||||
|
||||
// DefaultConfig returns a configuration with sensible defaults.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
@ -113,8 +127,9 @@ func DefaultConfig() *Config {
|
||||
MaxConcurrentExecutions: 10,
|
||||
|
||||
// WASM cache
|
||||
ModuleCacheSize: 100,
|
||||
EnablePrewarm: true,
|
||||
ModuleCacheSize: 100,
|
||||
EnablePrewarm: true,
|
||||
SlowInvokeThresholdMs: defaultSlowInvokeThresholdMs,
|
||||
|
||||
// Logging
|
||||
LogInvocations: true,
|
||||
@ -208,6 +223,9 @@ func (c *Config) ApplyDefaults() {
|
||||
if c.ModuleCacheSize == 0 {
|
||||
c.ModuleCacheSize = defaults.ModuleCacheSize
|
||||
}
|
||||
if c.SlowInvokeThresholdMs == 0 {
|
||||
c.SlowInvokeThresholdMs = defaults.SlowInvokeThresholdMs
|
||||
}
|
||||
if c.LogRetention == 0 {
|
||||
c.LogRetention = defaults.LogRetention
|
||||
}
|
||||
|
||||
@ -222,12 +222,17 @@ func NewEngine(cfg *Config, registry FunctionRegistry, hostServices HostServices
|
||||
return engine, nil
|
||||
}
|
||||
|
||||
// slowInvokeThreshold is the wall-clock duration above which Execute
|
||||
// emits a structured "slow invocation" warning with per-phase
|
||||
// breakdown. Picked to be well below the WS-handler's 30s ceiling
|
||||
// (bugboard #24) so we get diagnostic logs BEFORE the timeout actually
|
||||
// fires, surfacing which phase is the sink.
|
||||
const slowInvokeThreshold = 5 * time.Second
|
||||
// slowInvokeThreshold returns the wall-clock duration above which Execute
|
||||
// emits a structured "slow invocation" warning with per-phase breakdown.
|
||||
// Sourced from config (SlowInvokeThresholdMs) so a cluster under
|
||||
// investigation can lower it to surface the sub-second cold-start floor that
|
||||
// the 5s default hides (bugboard #27). Defaults to 5s when unset.
|
||||
func (e *Engine) slowInvokeThreshold() time.Duration {
|
||||
if e.config != nil && e.config.SlowInvokeThresholdMs > 0 {
|
||||
return time.Duration(e.config.SlowInvokeThresholdMs) * time.Millisecond
|
||||
}
|
||||
return defaultSlowInvokeThresholdMs * time.Millisecond
|
||||
}
|
||||
|
||||
// Execute runs a function with the given input and returns the output.
|
||||
//
|
||||
@ -332,7 +337,7 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
|
||||
module, err := e.getOrCompileModule(execCtx, fn.WASMCID)
|
||||
if err != nil {
|
||||
e.logInvocation(ctx, fn, invCtx, logBuf, startTime, 0, InvocationStatusError, err)
|
||||
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, "module-load-failed", err)
|
||||
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, 0, "module-load-failed", err)
|
||||
return nil, &ExecutionError{FunctionName: fn.Name, RequestID: invCtx.RequestID, Cause: err}
|
||||
}
|
||||
moduleLoadedAt = time.Now()
|
||||
@ -343,6 +348,10 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
|
||||
contextSetter = func() { hf.SetInvocationContext(invCtx) }
|
||||
contextClearer = func() { hf.ClearContext() }
|
||||
}
|
||||
// Attach a collector so ExecuteModule reports how long instantiate (TinyGo
|
||||
// _start cold-start) took, letting the slow-invoke diagnostic split the
|
||||
// execute phase into cold-start vs handler work (bugboard #27).
|
||||
execCtx, instTiming := execution.WithInstantiateTiming(execCtx)
|
||||
output, err := e.executor.ExecuteModule(execCtx, module, fn.Name, input, contextSetter, contextClearer)
|
||||
executeDoneAt = time.Now()
|
||||
if err != nil {
|
||||
@ -352,7 +361,7 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
|
||||
err = ErrTimeout
|
||||
}
|
||||
e.logInvocation(ctx, fn, invCtx, logBuf, startTime, len(output), status, err)
|
||||
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, string(status), err)
|
||||
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, instTiming.InstantiateNs, string(status), err)
|
||||
return nil, &ExecutionError{FunctionName: fn.Name, RequestID: invCtx.RequestID, Cause: err}
|
||||
}
|
||||
|
||||
@ -365,7 +374,7 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
|
||||
}
|
||||
|
||||
e.logInvocation(ctx, fn, invCtx, logBuf, startTime, len(output), InvocationStatusSuccess, nil)
|
||||
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, "success", nil)
|
||||
e.logSlowInvocation(invCtx, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt, instTiming.InstantiateNs, "success", nil)
|
||||
return output, nil
|
||||
}
|
||||
|
||||
@ -379,9 +388,9 @@ func (e *Engine) Execute(ctx context.Context, fn *Function, input []byte, invCtx
|
||||
// Zero-valued phase timestamps mean the phase was never reached, which
|
||||
// is itself signal — e.g. moduleLoadedAt=zero + executeDoneAt=zero with
|
||||
// large totalMs means we blocked in rate-limit OR module-load.
|
||||
func (e *Engine) logSlowInvocation(invCtx *InvocationContext, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt time.Time, status string, err error) {
|
||||
func (e *Engine) logSlowInvocation(invCtx *InvocationContext, startTime, ratelimitDoneAt, moduleLoadedAt, executeDoneAt time.Time, instantiateNs int64, status string, err error) {
|
||||
totalMs := time.Since(startTime).Milliseconds()
|
||||
if totalMs < slowInvokeThreshold.Milliseconds() {
|
||||
if totalMs < e.slowInvokeThreshold().Milliseconds() {
|
||||
return
|
||||
}
|
||||
// Compute phase deltas. Use 0 for unreached phases so the log line
|
||||
@ -396,6 +405,15 @@ func (e *Engine) logSlowInvocation(invCtx *InvocationContext, startTime, ratelim
|
||||
if !executeDoneAt.IsZero() && !moduleLoadedAt.IsZero() {
|
||||
executeMs = executeDoneAt.Sub(moduleLoadedAt).Milliseconds()
|
||||
}
|
||||
// Split execute into instantiate (TinyGo _start cold-start) vs run
|
||||
// (handler logic). A count=0 read with instantiate_ms ≈ execute_ms and
|
||||
// run_ms ≈ 0 is the bugboard #27 cold-start floor — the per-call fresh
|
||||
// instantiation, not the handler, is the sink.
|
||||
instantiateMs := instantiateNs / int64(time.Millisecond)
|
||||
runMs := executeMs - instantiateMs
|
||||
if runMs < 0 {
|
||||
runMs = 0
|
||||
}
|
||||
fields := []zap.Field{
|
||||
zap.String("namespace", invCtx.Namespace),
|
||||
zap.String("function", invCtx.FunctionName),
|
||||
@ -406,6 +424,8 @@ func (e *Engine) logSlowInvocation(invCtx *InvocationContext, startTime, ratelim
|
||||
zap.Int64("ratelimit_ms", ratelimitMs),
|
||||
zap.Int64("module_load_ms", moduleLoadMs),
|
||||
zap.Int64("execute_ms", executeMs),
|
||||
zap.Int64("instantiate_ms", instantiateMs),
|
||||
zap.Int64("run_ms", runMs),
|
||||
zap.String("invocation_status", status),
|
||||
}
|
||||
if err != nil {
|
||||
|
||||
@ -30,7 +30,7 @@ func TestLogSlowInvocation_belowThresholdEmitsNothing(t *testing.T) {
|
||||
invCtx := &InvocationContext{Namespace: "ns", FunctionName: "fast-fn"}
|
||||
|
||||
now := time.Now()
|
||||
e.logSlowInvocation(invCtx, now, now.Add(1*time.Millisecond), now.Add(2*time.Millisecond), now.Add(100*time.Millisecond), "success", nil)
|
||||
e.logSlowInvocation(invCtx, now, now.Add(1*time.Millisecond), now.Add(2*time.Millisecond), now.Add(100*time.Millisecond), 0, "success", nil)
|
||||
|
||||
if got := observed.Len(); got != 0 {
|
||||
t.Errorf("fast invocation (100ms < 5s threshold) emitted %d log lines; want 0", got)
|
||||
@ -51,12 +51,15 @@ func TestLogSlowInvocation_aboveThresholdEmitsBreakdown(t *testing.T) {
|
||||
WSClientID: "ws-client-xyz",
|
||||
}
|
||||
|
||||
// Simulate a 30s-class invocation that spent the bulk in execute.
|
||||
// Simulate a 30s-class invocation that spent the bulk in execute, of which
|
||||
// nearly all was cold-start instantiation (the bugboard #27 floor pattern):
|
||||
// instantiate_ms ≈ execute_ms, run_ms ≈ 0.
|
||||
start := time.Now().Add(-30 * time.Second)
|
||||
ratelimitDone := start.Add(50 * time.Millisecond)
|
||||
moduleLoaded := start.Add(150 * time.Millisecond)
|
||||
executeDone := start.Add(30 * time.Second)
|
||||
e.logSlowInvocation(invCtx, start, ratelimitDone, moduleLoaded, executeDone, "timeout", nil)
|
||||
instantiateNs := (29*time.Second + 800*time.Millisecond).Nanoseconds()
|
||||
e.logSlowInvocation(invCtx, start, ratelimitDone, moduleLoaded, executeDone, instantiateNs, "timeout", nil)
|
||||
|
||||
logs := observed.All()
|
||||
if len(logs) != 1 {
|
||||
@ -96,6 +99,18 @@ func TestLogSlowInvocation_aboveThresholdEmitsBreakdown(t *testing.T) {
|
||||
if executeMs < 29000 || executeMs > 30000 {
|
||||
t.Errorf("execute_ms = %d; want ~29900 (proves the phase-breakdown points at execute)", executeMs)
|
||||
}
|
||||
|
||||
// The #27 cold-start split: instantiate dominates execute, run ≈ 0. This is
|
||||
// the field AnChat needs to distinguish "fresh instantiate is the sink"
|
||||
// from "the handler is slow".
|
||||
instantiateMs, _ := contextMap["instantiate_ms"].(int64)
|
||||
runMs, _ := contextMap["run_ms"].(int64)
|
||||
if instantiateMs < 29000 || instantiateMs > 30000 {
|
||||
t.Errorf("instantiate_ms = %d; want ~29800 (cold-start dominates execute)", instantiateMs)
|
||||
}
|
||||
if runMs < 0 || runMs > 500 {
|
||||
t.Errorf("run_ms = %d; want ~100 (handler logic is trivial; cold-start is the sink)", runMs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogSlowInvocation_zeroPhaseTimestampsMeanUnreached(t *testing.T) {
|
||||
@ -112,7 +127,7 @@ func TestLogSlowInvocation_zeroPhaseTimestampsMeanUnreached(t *testing.T) {
|
||||
start := time.Now().Add(-10 * time.Second)
|
||||
ratelimitDone := start.Add(100 * time.Millisecond)
|
||||
// moduleLoadedAt and executeDoneAt left as zero — module-load failed
|
||||
e.logSlowInvocation(invCtx, start, ratelimitDone, time.Time{}, time.Time{}, "module-load-failed", nil)
|
||||
e.logSlowInvocation(invCtx, start, ratelimitDone, time.Time{}, time.Time{}, 0, "module-load-failed", nil)
|
||||
|
||||
logs := observed.All()
|
||||
if len(logs) != 1 {
|
||||
|
||||
@ -6,12 +6,36 @@ import (
|
||||
cryptorand "crypto/rand"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/tetratelabs/wazero"
|
||||
"github.com/tetratelabs/wazero/api"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// InstantiateTiming captures how long the per-invocation wazero
|
||||
// InstantiateModule call took (running TinyGo _start / package init). It rides
|
||||
// the ctx so the engine's slow-invoke diagnostic can split the execute phase
|
||||
// into cold-start (instantiate) vs handler work (run) — the distinction that
|
||||
// pins the bugboard #27 cold-start floor. Nil collector = not measured.
|
||||
type InstantiateTiming struct {
|
||||
InstantiateNs int64
|
||||
}
|
||||
|
||||
type instantiateTimingKey struct{}
|
||||
|
||||
// WithInstantiateTiming returns a ctx carrying a fresh InstantiateTiming that
|
||||
// ExecuteModule will fill in. The caller reads it back after ExecuteModule.
|
||||
func WithInstantiateTiming(ctx context.Context) (context.Context, *InstantiateTiming) {
|
||||
t := &InstantiateTiming{}
|
||||
return context.WithValue(ctx, instantiateTimingKey{}, t), t
|
||||
}
|
||||
|
||||
func instantiateTimingFrom(ctx context.Context) *InstantiateTiming {
|
||||
t, _ := ctx.Value(instantiateTimingKey{}).(*InstantiateTiming)
|
||||
return t
|
||||
}
|
||||
|
||||
// Executor handles WASM module execution.
|
||||
type Executor struct {
|
||||
runtime wazero.Runtime
|
||||
@ -101,8 +125,14 @@ func (e *Executor) ExecuteModule(ctx context.Context, compiled wazero.CompiledMo
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate and run the module (WASI _start will be called automatically)
|
||||
// Instantiate and run the module (WASI _start will be called automatically).
|
||||
// Time the instantiate so the engine can attribute cold-start vs handler
|
||||
// work (bugboard #27 cold-start floor); no-op when no collector is attached.
|
||||
instStart := time.Now()
|
||||
instance, err := e.runtime.InstantiateModule(ctx, compiled, moduleConfig)
|
||||
if t := instantiateTimingFrom(ctx); t != nil {
|
||||
t.InstantiateNs = time.Since(instStart).Nanoseconds()
|
||||
}
|
||||
if err != nil {
|
||||
// Check if stderr has any output
|
||||
if stderr.Len() > 0 {
|
||||
|
||||
@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/tetratelabs/wazero"
|
||||
"github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// Bugboard #120 — wazero defaults to a DETERMINISTIC (zero-seed) RNG source.
|
||||
@ -179,3 +180,31 @@ func TestModuleConfig_randWithoutFix_demoDeterministic(t *testing.T) {
|
||||
}
|
||||
// Determinism confirmed → fix is meaningful.
|
||||
}
|
||||
|
||||
// Bugboard #27 instrumentation: ExecuteModule must record how long the
|
||||
// per-invocation InstantiateModule (TinyGo _start cold-start) took into the
|
||||
// ctx-attached collector, so the engine can split the execute phase into
|
||||
// cold-start vs handler work. Without an attached collector it must be a no-op.
|
||||
func TestExecuteModule_recordsInstantiateTiming(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
runtime := wazero.NewRuntime(ctx)
|
||||
defer runtime.Close(ctx)
|
||||
if _, err := wasi_snapshot_preview1.Instantiate(ctx, runtime); err != nil {
|
||||
t.Fatalf("instantiate WASI: %v", err)
|
||||
}
|
||||
compiled, err := runtime.CompileModule(ctx, randProbeWasm)
|
||||
if err != nil {
|
||||
t.Fatalf("compile probe wasm: %v", err)
|
||||
}
|
||||
defer compiled.Close(ctx)
|
||||
|
||||
ex := NewExecutor(runtime, zap.NewNop(), 0)
|
||||
|
||||
tctx, timing := WithInstantiateTiming(ctx)
|
||||
if _, err := ex.ExecuteModule(tctx, compiled, "probe", nil, nil, nil); err != nil {
|
||||
t.Fatalf("ExecuteModule: %v", err)
|
||||
}
|
||||
if timing.InstantiateNs <= 0 {
|
||||
t.Errorf("InstantiateNs = %d; want > 0 (instantiate duration must be recorded)", timing.InstantiateNs)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user