anonpenguin23 abcc23c4f3 refactor(monorepo): restructure repo with core, website, vault, os packages
- add monorepo Makefile delegating to sub-projects
- update CI workflows, GoReleaser, gitignore for new structure
- revise README, CONTRIBUTING.md for monorepo overview
- bump Go to 1.24
2026-03-26 18:21:55 +02:00

222 lines
5.6 KiB
Go

package sandbox
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
)
// SeccompAction defines the action to take when a syscall is matched or not.
type SeccompAction string
const (
// ActionAllow allows the syscall.
ActionAllow SeccompAction = "SCMP_ACT_ALLOW"
// ActionLog logs the syscall but allows it (audit mode).
ActionLog SeccompAction = "SCMP_ACT_LOG"
// ActionKillProcess kills the process when the syscall is made.
ActionKillProcess SeccompAction = "SCMP_ACT_KILL_PROCESS"
)
// SeccompProfile defines a seccomp filter in the format understood by
// libseccomp / OCI runtime spec. The agent writes this to a temp file
// and applies it via the seccomp notifier or BPF loader before exec.
type SeccompProfile struct {
DefaultAction SeccompAction `json:"defaultAction"`
Syscalls []SeccompSyscall `json:"syscalls"`
}
// SeccompSyscall defines a set of syscalls and the action to take.
type SeccompSyscall struct {
Names []string `json:"names"`
Action SeccompAction `json:"action"`
}
// SeccompMode controls enforcement level.
type SeccompMode int
const (
// SeccompEnforce kills the process on disallowed syscalls.
SeccompEnforce SeccompMode = iota
// SeccompAudit logs disallowed syscalls but allows them (for profiling).
SeccompAudit
)
// baseSyscalls are syscalls every service needs for basic operation.
var baseSyscalls = []string{
// Process lifecycle
"exit", "exit_group", "getpid", "getppid", "gettid",
"clone", "clone3", "fork", "vfork", "execve", "execveat",
"wait4", "waitid",
// Memory management
"brk", "mmap", "munmap", "mremap", "mprotect", "madvise",
"mlock", "munlock",
// File operations
"read", "write", "pread64", "pwrite64", "readv", "writev",
"open", "openat", "close", "dup", "dup2", "dup3",
"stat", "fstat", "lstat", "newfstatat",
"access", "faccessat", "faccessat2",
"lseek", "fcntl", "flock",
"getcwd", "readlink", "readlinkat",
"getdents64",
// Directory operations
"mkdir", "mkdirat", "rmdir",
"rename", "renameat", "renameat2",
"unlink", "unlinkat",
"symlink", "symlinkat",
"link", "linkat",
"chmod", "fchmod", "fchmodat",
"chown", "fchown", "fchownat",
"utimensat",
// IO multiplexing
"epoll_create1", "epoll_ctl", "epoll_wait", "epoll_pwait", "epoll_pwait2",
"poll", "ppoll", "select", "pselect6",
"eventfd", "eventfd2",
// Networking (basic)
"socket", "connect", "accept", "accept4",
"bind", "listen",
"sendto", "recvfrom", "sendmsg", "recvmsg",
"shutdown", "getsockname", "getpeername",
"getsockopt", "setsockopt",
// Signals
"rt_sigaction", "rt_sigprocmask", "rt_sigreturn",
"sigaltstack", "kill", "tgkill",
// Time
"clock_gettime", "clock_getres", "gettimeofday",
"nanosleep", "clock_nanosleep",
// Threading / synchronization
"futex", "set_robust_list", "get_robust_list",
"set_tid_address",
// System info
"uname", "getuid", "getgid", "geteuid", "getegid",
"getgroups", "getrlimit", "setrlimit", "prlimit64",
"sysinfo", "getrandom",
// Pipe and IPC
"pipe", "pipe2",
"ioctl",
// Misc
"arch_prctl", "prctl", "seccomp",
"sched_yield", "sched_getaffinity",
"rseq",
"close_range",
"membarrier",
}
// ServiceSyscalls defines additional syscalls required by each service
// beyond the base set. These were determined by running services in audit
// mode (SCMP_ACT_LOG) and capturing required syscalls.
var ServiceSyscalls = map[string][]string{
"rqlite": {
// Raft log + SQLite WAL
"fsync", "fdatasync", "ftruncate", "fallocate",
"sync_file_range",
// SQLite memory-mapped I/O
"mincore",
// Raft networking (TCP)
"sendfile",
},
"olric": {
// Memberlist gossip (UDP multicast + TCP)
"sendmmsg", "recvmmsg",
// Embedded map operations
"fsync", "fdatasync", "ftruncate",
},
"ipfs": {
// Block storage and data transfer
"sendfile", "splice", "tee",
// Repo management
"fsync", "fdatasync", "ftruncate", "fallocate",
// libp2p networking
"sendmmsg", "recvmmsg",
},
"ipfs-cluster": {
// CRDT datastore
"fsync", "fdatasync", "ftruncate", "fallocate",
// libp2p networking
"sendfile",
},
"gateway": {
// HTTP server
"sendfile", "splice",
// WebSocket
"sendmmsg", "recvmmsg",
// TLS
"fsync", "fdatasync",
},
"coredns": {
// DNS (UDP + TCP on port 53)
"sendmmsg", "recvmmsg",
// Zone file / cache
"fsync", "fdatasync",
},
}
// BuildProfile creates a seccomp profile for the given service.
func BuildProfile(serviceName string, mode SeccompMode) *SeccompProfile {
defaultAction := ActionKillProcess
if mode == SeccompAudit {
defaultAction = ActionLog
}
// Combine base + service-specific syscalls
allowed := make([]string, len(baseSyscalls))
copy(allowed, baseSyscalls)
if extra, ok := ServiceSyscalls[serviceName]; ok {
allowed = append(allowed, extra...)
}
return &SeccompProfile{
DefaultAction: defaultAction,
Syscalls: []SeccompSyscall{
{
Names: allowed,
Action: ActionAllow,
},
},
}
}
// WriteProfile writes a seccomp profile to a temporary file and returns the path.
// The caller is responsible for removing the file after the process starts.
func WriteProfile(serviceName string, mode SeccompMode) (string, error) {
profile := BuildProfile(serviceName, mode)
data, err := json.MarshalIndent(profile, "", " ")
if err != nil {
return "", fmt.Errorf("failed to marshal seccomp profile: %w", err)
}
dir := "/tmp/orama-seccomp"
if err := os.MkdirAll(dir, 0700); err != nil {
return "", fmt.Errorf("failed to create seccomp dir: %w", err)
}
path := filepath.Join(dir, serviceName+".json")
if err := os.WriteFile(path, data, 0600); err != nil {
return "", fmt.Errorf("failed to write seccomp profile: %w", err)
}
return path, nil
}