orama/core/pkg/serverless/ratelimit.go

package serverless

// ratelimit.go provides a multi-tier token-bucket rate limiter for serverless
// function invocations. Three tiers, applied in order; first rejection wins:
//
//   1. Per-(namespace, function, wallet) — only when a function declares an override
//   2. Per-(namespace, wallet) — gateway-wide default per-user limit
//   3. Per-namespace total — protects against single-namespace exhaustion
//
// Anonymous callers (no wallet) fall back to per-IP buckets at tier 2.
//
// Per-bucket state is held in a sharded LRU to bound memory: configurable
// MaxBucketsPerScope (default 100k per tier) — buckets evicted are
// effectively "limit reset for that key" which is acceptable.
//
// See plan: core/plans/platform/09_PER_WALLET_RATE_LIMIT.md.

import (
	"container/list"
	"context"
	"fmt"
	"hash/fnv"
	"sync"
	"time"
)

// RateLimitRequest holds the inputs for a single Allow check.
type RateLimitRequest struct {
	Namespace string
	Function  string
	Wallet    string                  // empty = anonymous; per-wallet tier falls back to per-IP
	IP        string                  // remote IP, used when Wallet is empty
	Override  *PerFunctionRateLimit  // optional per-function tightening
}

// PerFunctionRateLimit overrides the default per-wallet limits for one function.
// Set zero values to inherit defaults.
type PerFunctionRateLimit struct {
	PerWalletPerMinute int
	PerWalletBurst     int
}

// LimiterConfig holds gateway-wide rate-limiter defaults. Zero values mean
// "use the built-in default" — see DefaultLimiterConfig.
type LimiterConfig struct {
	// Per-(namespace, wallet) defaults
	PerWalletPerMinute int
	PerWalletBurst     int

	// Per-(namespace) ceiling
	PerNamespacePerMinute int
	PerNamespaceBurst     int

	// Per-IP bucket for anonymous callers
	PerIPPerMinute int
	PerIPBurst     int

	// LRU cap for tracked buckets per tier
	MaxBucketsPerScope int
}

// DefaultLimiterConfig returns a sensible default config. Tuned for typical
// per-user app load with headroom for short bursts.
func DefaultLimiterConfig() LimiterConfig {
	return LimiterConfig{
		PerWalletPerMinute:    600,    // 10/sec sustained
		PerWalletBurst:        60,     // 60-token burst window
		PerNamespacePerMinute: 60_000, // 1k/sec sustained per namespace
		PerNamespaceBurst:     6000,
		PerIPPerMinute:        120,    // 2/sec for anonymous
		PerIPBurst:            30,
		MaxBucketsPerScope:    100_000,
	}
}

// Decision is the result of an Allow check.
type Decision struct {
	Allowed    bool
	RetryAfter time.Duration
	Scope      string // "per_function_wallet" | "per_wallet" | "per_namespace" | "per_ip"
}

// RateLimitedError is returned by the engine when a request is rejected.
// Carries the retry-after for the gateway HTTP layer to serve as Retry-After.
type RateLimitedError struct {
	Scope      string
	RetryAfter time.Duration
}

func (e *RateLimitedError) Error() string {
	return fmt.Sprintf("rate limit exceeded (scope=%s, retry_after=%s)", e.Scope, e.RetryAfter)
}

// MultiTierLimiter implements RateLimiter as three layered token-bucket
// scopes with sharded LRU bucket tracking.
type MultiTierLimiter struct {
	cfg LimiterConfig

	// Tier buckets — each scope owns its own LRU.
	fnWalletBuckets *lruBuckets // (ns, fn, wallet) — only populated when override exists
	walletBuckets   *lruBuckets // (ns, wallet)
	namespaceBuckets *lruBuckets // (ns)
	ipBuckets       *lruBuckets // (ns, ip) — for anonymous callers
}

// NewMultiTierLimiter constructs a limiter with the given config. Pass
// DefaultLimiterConfig() for sensible defaults; override fields as needed.
func NewMultiTierLimiter(cfg LimiterConfig) *MultiTierLimiter {
	if cfg.PerWalletPerMinute <= 0 {
		cfg.PerWalletPerMinute = 600
	}
	if cfg.PerWalletBurst <= 0 {
		cfg.PerWalletBurst = 60
	}
	if cfg.PerNamespacePerMinute <= 0 {
		cfg.PerNamespacePerMinute = 60_000
	}
	if cfg.PerNamespaceBurst <= 0 {
		cfg.PerNamespaceBurst = 6000
	}
	if cfg.PerIPPerMinute <= 0 {
		cfg.PerIPPerMinute = 120
	}
	if cfg.PerIPBurst <= 0 {
		cfg.PerIPBurst = 30
	}
	if cfg.MaxBucketsPerScope <= 0 {
		cfg.MaxBucketsPerScope = 100_000
	}
	return &MultiTierLimiter{
		cfg:              cfg,
		fnWalletBuckets:  newLRUBuckets(cfg.MaxBucketsPerScope),
		walletBuckets:    newLRUBuckets(cfg.MaxBucketsPerScope),
		namespaceBuckets: newLRUBuckets(cfg.MaxBucketsPerScope),
		ipBuckets:        newLRUBuckets(cfg.MaxBucketsPerScope),
	}
}

// AllowRequest returns the layered decision. The first tier to reject wins;
// on rejection, RetryAfter is the wait until that tier could accept again.
//
// This is the rich path; the engine prefers it via type assertion. The
// legacy `Allow(ctx, key)` method below remains for back-compat with the
// simple RateLimiter interface.
func (l *MultiTierLimiter) AllowRequest(ctx context.Context, req RateLimitRequest) (Decision, error) {
	// Tier 1: per-(ns, fn, wallet) override — only when the function declared one.
	if req.Override != nil && req.Override.PerWalletPerMinute > 0 && req.Wallet != "" {
		key := req.Namespace + "/" + req.Function + "/" + req.Wallet
		burst := req.Override.PerWalletBurst
		if burst <= 0 {
			burst = req.Override.PerWalletPerMinute / 10
			if burst <= 0 {
				burst = 1
			}
		}
		if d := l.fnWalletBuckets.tryConsume(key,
			float64(req.Override.PerWalletPerMinute)/60.0,
			float64(burst)); !d.Allowed {
			d.Scope = "per_function_wallet"
			return d, nil
		}
	}

	// Tier 2: per-(ns, wallet) OR per-(ns, ip) for anonymous.
	if req.Wallet != "" {
		key := req.Namespace + "/" + req.Wallet
		if d := l.walletBuckets.tryConsume(key,
			float64(l.cfg.PerWalletPerMinute)/60.0,
			float64(l.cfg.PerWalletBurst)); !d.Allowed {
			d.Scope = "per_wallet"
			return d, nil
		}
	} else if req.IP != "" {
		key := req.Namespace + "/" + req.IP
		if d := l.ipBuckets.tryConsume(key,
			float64(l.cfg.PerIPPerMinute)/60.0,
			float64(l.cfg.PerIPBurst)); !d.Allowed {
			d.Scope = "per_ip"
			return d, nil
		}
	}

	// Tier 3: per-namespace total (always applies).
	if req.Namespace != "" {
		if d := l.namespaceBuckets.tryConsume(req.Namespace,
			float64(l.cfg.PerNamespacePerMinute)/60.0,
			float64(l.cfg.PerNamespaceBurst)); !d.Allowed {
			d.Scope = "per_namespace"
			return d, nil
		}
	}

	return Decision{Allowed: true}, nil
}

// Allow satisfies the legacy serverless.RateLimiter interface. Treats `key`
// as the wallet/namespace combo from older call sites. Prefer AllowRequest
// in new code.
func (l *MultiTierLimiter) Allow(ctx context.Context, key string) (bool, error) {
	d, err := l.AllowRequest(ctx, RateLimitRequest{Namespace: "_global", Wallet: key})
	return d.Allowed, err
}

// ----------------------------------------------------------------------------
// Backward compatibility: keep the old TokenBucketLimiter type as a
// single-bucket impl that satisfies the old simple interface. New code uses
// MultiTierLimiter exclusively.
// ----------------------------------------------------------------------------

// TokenBucketLimiter is a single global bucket — kept for backwards compat.
// Prefer MultiTierLimiter for any new use.
type TokenBucketLimiter struct {
	mu       sync.Mutex
	tokens   float64
	max      float64
	refill   float64
	lastTime time.Time
}

// NewTokenBucketLimiter creates a single-bucket limiter with the given per-minute limit.
func NewTokenBucketLimiter(perMinute int) *TokenBucketLimiter {
	perSecond := float64(perMinute) / 60.0
	return &TokenBucketLimiter{
		tokens:   float64(perMinute),
		max:      float64(perMinute),
		refill:   perSecond,
		lastTime: time.Now(),
	}
}

// Allow checks if a request should be allowed.
// The key argument is ignored — this is a single global bucket.
func (t *TokenBucketLimiter) Allow(_ context.Context, _ string) (bool, error) {
	t.mu.Lock()
	defer t.mu.Unlock()

	now := time.Now()
	elapsed := now.Sub(t.lastTime).Seconds()
	t.lastTime = now

	t.tokens += elapsed * t.refill
	if t.tokens > t.max {
		t.tokens = t.max
	}
	if t.tokens < 1.0 {
		return false, nil
	}
	t.tokens--
	return true, nil
}

// ----------------------------------------------------------------------------
// lruBuckets: sharded map of token buckets with LRU eviction
// ----------------------------------------------------------------------------

const lruShards = 16

type lruBuckets struct {
	shards [lruShards]*bucketShard
}

type bucketShard struct {
	mu      sync.Mutex
	buckets map[string]*tokenBucket
	order   *list.List // each Element.Value is a string (the key); front = most recent
	keyToEl map[string]*list.Element
	cap     int
}

func newLRUBuckets(capacity int) *lruBuckets {
	per := capacity / lruShards
	if per < 1 {
		per = 1
	}
	lb := &lruBuckets{}
	for i := range lb.shards {
		lb.shards[i] = &bucketShard{
			buckets: make(map[string]*tokenBucket, per),
			order:   list.New(),
			keyToEl: make(map[string]*list.Element, per),
			cap:     per,
		}
	}
	return lb
}

// tryConsume looks up or creates the bucket for `key`, attempts to consume
// one token, and returns the decision. Updates LRU on touch and evicts the
// least-recently-used bucket when the shard is at capacity.
func (l *lruBuckets) tryConsume(key string, ratePerSec, burst float64) Decision {
	s := l.shards[shardIdx(key)]
	s.mu.Lock()
	defer s.mu.Unlock()

	b, ok := s.buckets[key]
	if !ok {
		// Capacity check + LRU eviction.
		if len(s.buckets) >= s.cap {
			oldest := s.order.Back()
			if oldest != nil {
				oldKey := oldest.Value.(string)
				delete(s.buckets, oldKey)
				delete(s.keyToEl, oldKey)
				s.order.Remove(oldest)
			}
		}
		b = &tokenBucket{
			tokens:     burst, // start full
			max:        burst,
			refill:     ratePerSec,
			lastRefill: time.Now(),
		}
		s.buckets[key] = b
		s.keyToEl[key] = s.order.PushFront(key)
	} else {
		// Touch LRU.
		s.order.MoveToFront(s.keyToEl[key])
		// Update rate/burst in case the function's override changed since last call.
		b.refill = ratePerSec
		b.max = burst
	}

	return b.tryConsume()
}

// tokenBucket is a leaky bucket; not safe for concurrent use without external lock.
type tokenBucket struct {
	tokens     float64
	max        float64
	refill     float64 // tokens per second
	lastRefill time.Time
}

func (b *tokenBucket) tryConsume() Decision {
	now := time.Now()
	elapsed := now.Sub(b.lastRefill).Seconds()
	b.lastRefill = now
	b.tokens += elapsed * b.refill
	if b.tokens > b.max {
		b.tokens = b.max
	}
	if b.tokens < 1.0 {
		// Compute how long until we have one token.
		needed := 1.0 - b.tokens
		seconds := needed / b.refill
		return Decision{
			Allowed:    false,
			RetryAfter: time.Duration(seconds * float64(time.Second)),
		}
	}
	b.tokens--
	return Decision{Allowed: true}
}

// shardIdx hashes key to one of lruShards.
func shardIdx(key string) uint32 {
	h := fnv.New32a()
	_, _ = h.Write([]byte(key))
	return h.Sum32() % lruShards
}