Created new monitoring cli interface for nodes, namespaces and cluster

This commit is contained in:
anonpenguin23 2026-02-16 11:16:02 +02:00
parent 88ba08fcba
commit 1e38fc2861
42 changed files with 5957 additions and 39 deletions

View File

@ -130,11 +130,11 @@ orama deploy go <path> --name myapp # Go binaries (must have /health e
orama deploy nodejs <path> --name myapp # Node.js apps (must have /health endpoint) orama deploy nodejs <path> --name myapp # Node.js apps (must have /health endpoint)
# Manage deployments # Manage deployments
orama deployments list # List all deployments orama app list # List all deployments
orama deployments get <name> # Get deployment details orama app get <name> # Get deployment details
orama deployments logs <name> --follow # View logs orama app logs <name> --follow # View logs
orama deployments delete <name> # Delete deployment orama app delete <name> # Delete deployment
orama deployments rollback <name> --version 1 # Rollback to version orama app rollback <name> --version 1 # Rollback to version
``` ```
### SQLite Databases ### SQLite Databases
@ -147,28 +147,12 @@ orama db backup <name> # Backup to IPFS
orama db backups <name> # List backups orama db backups <name> # List backups
``` ```
### Network Status ### Environment Management
```bash ```bash
orama health # Cluster health check orama env list # List available environments
orama peers # List connected peers orama env current # Show active environment
orama status # Network status orama env use <name> # Switch environment
```
### RQLite Operations
```bash
orama query "SELECT * FROM users"
orama query "CREATE TABLE users (id INTEGER PRIMARY KEY)"
orama transaction --file ops.json
```
### Pub/Sub
```bash
orama pubsub publish <topic> <message>
orama pubsub subscribe <topic> 30s
orama pubsub topics
``` ```
## Serverless Functions (WASM) ## Serverless Functions (WASM)
@ -267,14 +251,14 @@ Orama Network integrates with the [Anyone Protocol](https://anyone.io) for anony
```bash ```bash
# Install as relay operator (earn rewards) # Install as relay operator (earn rewards)
sudo orama install --vps-ip <IP> --domain <domain> \ sudo orama node install --vps-ip <IP> --domain <domain> \
--anyone-relay \ --anyone-relay \
--anyone-nickname "MyRelay" \ --anyone-nickname "MyRelay" \
--anyone-contact "operator@email.com" \ --anyone-contact "operator@email.com" \
--anyone-wallet "0x1234...abcd" --anyone-wallet "0x1234...abcd"
# With exit relay (legal implications apply) # With exit relay (legal implications apply)
sudo orama install --vps-ip <IP> --domain <domain> \ sudo orama node install --vps-ip <IP> --domain <domain> \
--anyone-relay \ --anyone-relay \
--anyone-exit \ --anyone-exit \
--anyone-nickname "MyExitRelay" \ --anyone-nickname "MyExitRelay" \
@ -282,7 +266,7 @@ sudo orama install --vps-ip <IP> --domain <domain> \
--anyone-wallet "0x1234...abcd" --anyone-wallet "0x1234...abcd"
# Migrate existing Anyone installation # Migrate existing Anyone installation
sudo orama install --vps-ip <IP> --domain <domain> \ sudo orama node install --vps-ip <IP> --domain <domain> \
--anyone-relay \ --anyone-relay \
--anyone-migrate \ --anyone-migrate \
--anyone-nickname "MyRelay" \ --anyone-nickname "MyRelay" \
@ -317,31 +301,34 @@ go install github.com/DeBrosOfficial/network/cmd/cli@latest
**Setup (after installation):** **Setup (after installation):**
```bash ```bash
sudo orama install --interactive sudo orama node install --interactive
``` ```
### Service Management ### Service Management
```bash ```bash
# Status # Status
orama status sudo orama node status
# Control services # Control services
sudo orama start sudo orama node start
sudo orama stop sudo orama node stop
sudo orama restart sudo orama node restart
# Diagnose issues
sudo orama node doctor
# View logs # View logs
orama logs node --follow orama node logs node --follow
orama logs gateway --follow orama node logs gateway --follow
orama logs ipfs --follow orama node logs ipfs --follow
``` ```
### Upgrade ### Upgrade
```bash ```bash
# Upgrade to latest version # Upgrade to latest version
sudo orama upgrade --interactive sudo orama node upgrade --restart
``` ```
## Configuration ## Configuration
@ -397,9 +384,9 @@ rqlite -H localhost -p 5001
```bash ```bash
# Production reset (⚠️ DESTROYS DATA) # Production reset (⚠️ DESTROYS DATA)
sudo orama uninstall sudo orama node uninstall
sudo rm -rf /opt/orama/.orama sudo rm -rf /opt/orama/.orama
sudo orama install sudo orama node install
``` ```
## HTTP Gateway API ## HTTP Gateway API

View File

@ -13,6 +13,7 @@ import (
deploycmd "github.com/DeBrosOfficial/network/pkg/cli/cmd/deploy" deploycmd "github.com/DeBrosOfficial/network/pkg/cli/cmd/deploy"
"github.com/DeBrosOfficial/network/pkg/cli/cmd/envcmd" "github.com/DeBrosOfficial/network/pkg/cli/cmd/envcmd"
"github.com/DeBrosOfficial/network/pkg/cli/cmd/inspectcmd" "github.com/DeBrosOfficial/network/pkg/cli/cmd/inspectcmd"
"github.com/DeBrosOfficial/network/pkg/cli/cmd/monitorcmd"
"github.com/DeBrosOfficial/network/pkg/cli/cmd/namespacecmd" "github.com/DeBrosOfficial/network/pkg/cli/cmd/namespacecmd"
"github.com/DeBrosOfficial/network/pkg/cli/cmd/node" "github.com/DeBrosOfficial/network/pkg/cli/cmd/node"
) )
@ -75,6 +76,9 @@ and interacting with the Orama distributed network.`,
// Inspect command // Inspect command
rootCmd.AddCommand(inspectcmd.Cmd) rootCmd.AddCommand(inspectcmd.Cmd)
// Monitor command
rootCmd.AddCommand(monitorcmd.Cmd)
return rootCmd return rootCmd
} }

View File

@ -0,0 +1,200 @@
package monitorcmd
import (
"context"
"os"
"time"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
"github.com/DeBrosOfficial/network/pkg/cli/monitor/display"
"github.com/DeBrosOfficial/network/pkg/cli/monitor/tui"
"github.com/spf13/cobra"
)
// Cmd is the root monitor command.
var Cmd = &cobra.Command{
Use: "monitor",
Short: "Monitor cluster health from your local machine",
Long: `SSH into cluster nodes and display real-time health data.
Runs 'orama node report --json' on each node and aggregates results.
Without a subcommand, launches the interactive TUI.`,
RunE: runLive,
}
// Shared persistent flags.
var (
flagEnv string
flagJSON bool
flagNode string
flagConfig string
)
func init() {
Cmd.PersistentFlags().StringVar(&flagEnv, "env", "", "Environment: devnet, testnet, mainnet (required)")
Cmd.PersistentFlags().BoolVar(&flagJSON, "json", false, "Machine-readable JSON output")
Cmd.PersistentFlags().StringVar(&flagNode, "node", "", "Filter to specific node host/IP")
Cmd.PersistentFlags().StringVar(&flagConfig, "config", "scripts/remote-nodes.conf", "Path to remote-nodes.conf")
Cmd.MarkPersistentFlagRequired("env")
Cmd.AddCommand(liveCmd)
Cmd.AddCommand(clusterCmd)
Cmd.AddCommand(nodeCmd)
Cmd.AddCommand(serviceCmd)
Cmd.AddCommand(meshCmd)
Cmd.AddCommand(dnsCmd)
Cmd.AddCommand(namespacesCmd)
Cmd.AddCommand(alertsCmd)
Cmd.AddCommand(reportCmd)
}
// ---------------------------------------------------------------------------
// Subcommands
// ---------------------------------------------------------------------------
var liveCmd = &cobra.Command{
Use: "live",
Short: "Interactive TUI monitor",
RunE: runLive,
}
var clusterCmd = &cobra.Command{
Use: "cluster",
Short: "Cluster overview (one-shot)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
if flagJSON {
return display.ClusterJSON(snap, os.Stdout)
}
return display.ClusterTable(snap, os.Stdout)
},
}
var nodeCmd = &cobra.Command{
Use: "node",
Short: "Per-node health details (one-shot)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
if flagJSON {
return display.NodeJSON(snap, os.Stdout)
}
return display.NodeTable(snap, os.Stdout)
},
}
var serviceCmd = &cobra.Command{
Use: "service",
Short: "Service status across the cluster (one-shot)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
if flagJSON {
return display.ServiceJSON(snap, os.Stdout)
}
return display.ServiceTable(snap, os.Stdout)
},
}
var meshCmd = &cobra.Command{
Use: "mesh",
Short: "Mesh connectivity status (one-shot)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
if flagJSON {
return display.MeshJSON(snap, os.Stdout)
}
return display.MeshTable(snap, os.Stdout)
},
}
var dnsCmd = &cobra.Command{
Use: "dns",
Short: "DNS health overview (one-shot)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
if flagJSON {
return display.DNSJSON(snap, os.Stdout)
}
return display.DNSTable(snap, os.Stdout)
},
}
var namespacesCmd = &cobra.Command{
Use: "namespaces",
Short: "Namespace usage summary (one-shot)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
if flagJSON {
return display.NamespacesJSON(snap, os.Stdout)
}
return display.NamespacesTable(snap, os.Stdout)
},
}
var alertsCmd = &cobra.Command{
Use: "alerts",
Short: "Active alerts and warnings (one-shot)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
if flagJSON {
return display.AlertsJSON(snap, os.Stdout)
}
return display.AlertsTable(snap, os.Stdout)
},
}
var reportCmd = &cobra.Command{
Use: "report",
Short: "Full cluster report (JSON)",
RunE: func(cmd *cobra.Command, args []string) error {
snap, err := collectSnapshot()
if err != nil {
return err
}
return display.FullReport(snap, os.Stdout)
},
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
func collectSnapshot() (*monitor.ClusterSnapshot, error) {
cfg := newConfig()
return monitor.CollectOnce(context.Background(), cfg)
}
func newConfig() monitor.CollectorConfig {
return monitor.CollectorConfig{
ConfigPath: flagConfig,
Env: flagEnv,
NodeFilter: flagNode,
Timeout: 30 * time.Second,
}
}
func runLive(cmd *cobra.Command, args []string) error {
cfg := newConfig()
return tui.Run(cfg)
}

View File

@ -25,4 +25,5 @@ func init() {
Cmd.AddCommand(inviteCmd) Cmd.AddCommand(inviteCmd)
Cmd.AddCommand(migrateCmd) Cmd.AddCommand(migrateCmd)
Cmd.AddCommand(doctorCmd) Cmd.AddCommand(doctorCmd)
Cmd.AddCommand(reportCmd)
} }

View File

@ -0,0 +1,22 @@
package node
import (
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
"github.com/spf13/cobra"
)
var reportCmd = &cobra.Command{
Use: "report",
Short: "Output comprehensive node health data as JSON",
Long: `Collect all system and service data from this node and output
as a single JSON blob. Designed to be called by 'orama monitor' over SSH.
Requires root privileges for full data collection.`,
RunE: func(cmd *cobra.Command, args []string) error {
jsonFlag, _ := cmd.Flags().GetBool("json")
return report.Handle(jsonFlag, "")
},
}
func init() {
reportCmd.Flags().Bool("json", true, "Output as JSON (default)")
}

454
pkg/cli/monitor/alerts.go Normal file
View File

@ -0,0 +1,454 @@
package monitor
import (
"fmt"
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
)
// AlertSeverity represents the severity of an alert.
type AlertSeverity string
const (
AlertCritical AlertSeverity = "critical"
AlertWarning AlertSeverity = "warning"
AlertInfo AlertSeverity = "info"
)
// Alert represents a detected issue.
type Alert struct {
Severity AlertSeverity `json:"severity"`
Subsystem string `json:"subsystem"`
Node string `json:"node"`
Message string `json:"message"`
}
// DeriveAlerts scans a ClusterSnapshot and produces alerts.
func DeriveAlerts(snap *ClusterSnapshot) []Alert {
var alerts []Alert
// Collection failures
for _, cs := range snap.Nodes {
if cs.Error != nil {
alerts = append(alerts, Alert{
Severity: AlertCritical,
Subsystem: "ssh",
Node: cs.Node.Host,
Message: fmt.Sprintf("Collection failed: %v", cs.Error),
})
}
}
reports := snap.Healthy()
if len(reports) == 0 {
return alerts
}
// Cross-node: RQLite leader
alerts = append(alerts, checkRQLiteLeader(reports)...)
// Cross-node: Raft term consistency
alerts = append(alerts, checkRaftTermConsistency(reports)...)
// Cross-node: Applied index lag
alerts = append(alerts, checkAppliedIndexLag(reports)...)
// Cross-node: WireGuard peer symmetry
alerts = append(alerts, checkWGPeerSymmetry(reports)...)
// Cross-node: Clock skew
alerts = append(alerts, checkClockSkew(reports)...)
// Cross-node: Binary version
alerts = append(alerts, checkBinaryVersion(reports)...)
// Per-node checks
for _, r := range reports {
host := nodeHost(r)
alerts = append(alerts, checkNodeRQLite(r, host)...)
alerts = append(alerts, checkNodeWireGuard(r, host)...)
alerts = append(alerts, checkNodeSystem(r, host)...)
alerts = append(alerts, checkNodeServices(r, host)...)
alerts = append(alerts, checkNodeDNS(r, host)...)
alerts = append(alerts, checkNodeAnyone(r, host)...)
alerts = append(alerts, checkNodeProcesses(r, host)...)
alerts = append(alerts, checkNodeNamespaces(r, host)...)
alerts = append(alerts, checkNodeNetwork(r, host)...)
}
return alerts
}
func nodeHost(r *report.NodeReport) string {
if r.PublicIP != "" {
return r.PublicIP
}
return r.Hostname
}
// --- Cross-node checks ---
func checkRQLiteLeader(reports []*report.NodeReport) []Alert {
var alerts []Alert
leaders := 0
leaderAddrs := map[string]bool{}
for _, r := range reports {
if r.RQLite != nil && r.RQLite.RaftState == "Leader" {
leaders++
}
if r.RQLite != nil && r.RQLite.LeaderAddr != "" {
leaderAddrs[r.RQLite.LeaderAddr] = true
}
}
if leaders == 0 {
alerts = append(alerts, Alert{AlertCritical, "rqlite", "cluster", "No RQLite leader found"})
} else if leaders > 1 {
alerts = append(alerts, Alert{AlertCritical, "rqlite", "cluster",
fmt.Sprintf("Split brain: %d leaders detected", leaders)})
}
if len(leaderAddrs) > 1 {
alerts = append(alerts, Alert{AlertWarning, "rqlite", "cluster",
fmt.Sprintf("Leader disagreement: nodes report %d different leader addresses", len(leaderAddrs))})
}
return alerts
}
func checkRaftTermConsistency(reports []*report.NodeReport) []Alert {
var minTerm, maxTerm uint64
first := true
for _, r := range reports {
if r.RQLite == nil || !r.RQLite.Responsive {
continue
}
if first {
minTerm = r.RQLite.Term
maxTerm = r.RQLite.Term
first = true
}
if r.RQLite.Term < minTerm {
minTerm = r.RQLite.Term
}
if r.RQLite.Term > maxTerm {
maxTerm = r.RQLite.Term
}
first = false
}
if maxTerm-minTerm > 1 {
return []Alert{{AlertWarning, "rqlite", "cluster",
fmt.Sprintf("Raft term inconsistency: min=%d, max=%d (delta=%d)", minTerm, maxTerm, maxTerm-minTerm)}}
}
return nil
}
func checkAppliedIndexLag(reports []*report.NodeReport) []Alert {
var maxApplied uint64
for _, r := range reports {
if r.RQLite != nil && r.RQLite.Applied > maxApplied {
maxApplied = r.RQLite.Applied
}
}
var alerts []Alert
for _, r := range reports {
if r.RQLite == nil || !r.RQLite.Responsive {
continue
}
lag := maxApplied - r.RQLite.Applied
if lag > 100 {
alerts = append(alerts, Alert{AlertWarning, "rqlite", nodeHost(r),
fmt.Sprintf("Applied index lag: %d behind leader (local=%d, max=%d)", lag, r.RQLite.Applied, maxApplied)})
}
}
return alerts
}
func checkWGPeerSymmetry(reports []*report.NodeReport) []Alert {
// Build map: wg_ip -> set of peer public keys
type nodeInfo struct {
host string
wgIP string
peerKeys map[string]bool
}
var nodes []nodeInfo
for _, r := range reports {
if r.WireGuard == nil || !r.WireGuard.InterfaceUp {
continue
}
ni := nodeInfo{host: nodeHost(r), wgIP: r.WireGuard.WgIP, peerKeys: map[string]bool{}}
for _, p := range r.WireGuard.Peers {
ni.peerKeys[p.PublicKey] = true
}
nodes = append(nodes, ni)
}
// For WG peer symmetry, we check peer counts match (N-1 peers expected)
var alerts []Alert
expectedPeers := len(nodes) - 1
for _, ni := range nodes {
if len(ni.peerKeys) < expectedPeers {
alerts = append(alerts, Alert{AlertCritical, "wireguard", ni.host,
fmt.Sprintf("WG peer count mismatch: has %d peers, expected %d", len(ni.peerKeys), expectedPeers)})
}
}
return alerts
}
func checkClockSkew(reports []*report.NodeReport) []Alert {
var times []struct {
host string
t int64
}
for _, r := range reports {
if r.System != nil && r.System.TimeUnix > 0 {
times = append(times, struct {
host string
t int64
}{nodeHost(r), r.System.TimeUnix})
}
}
if len(times) < 2 {
return nil
}
var minT, maxT int64 = times[0].t, times[0].t
var minHost, maxHost string = times[0].host, times[0].host
for _, t := range times[1:] {
if t.t < minT {
minT = t.t
minHost = t.host
}
if t.t > maxT {
maxT = t.t
maxHost = t.host
}
}
delta := maxT - minT
if delta > 5 {
return []Alert{{AlertWarning, "system", "cluster",
fmt.Sprintf("Clock skew: %ds between %s and %s", delta, minHost, maxHost)}}
}
return nil
}
func checkBinaryVersion(reports []*report.NodeReport) []Alert {
versions := map[string][]string{} // version -> list of hosts
for _, r := range reports {
v := r.Version
if v == "" {
v = "unknown"
}
versions[v] = append(versions[v], nodeHost(r))
}
if len(versions) > 1 {
msg := "Binary version mismatch:"
for v, hosts := range versions {
msg += fmt.Sprintf(" %s=%v", v, hosts)
}
return []Alert{{AlertWarning, "system", "cluster", msg}}
}
return nil
}
// --- Per-node checks ---
func checkNodeRQLite(r *report.NodeReport, host string) []Alert {
if r.RQLite == nil {
return nil
}
var alerts []Alert
if !r.RQLite.Responsive {
alerts = append(alerts, Alert{AlertCritical, "rqlite", host, "RQLite not responding"})
}
if r.RQLite.Responsive && !r.RQLite.Ready {
alerts = append(alerts, Alert{AlertWarning, "rqlite", host, "RQLite not ready (/readyz failed)"})
}
if r.RQLite.Responsive && !r.RQLite.StrongRead {
alerts = append(alerts, Alert{AlertWarning, "rqlite", host, "Strong read failed"})
}
return alerts
}
func checkNodeWireGuard(r *report.NodeReport, host string) []Alert {
if r.WireGuard == nil {
return nil
}
var alerts []Alert
if !r.WireGuard.InterfaceUp {
alerts = append(alerts, Alert{AlertCritical, "wireguard", host, "WireGuard interface down"})
return alerts
}
for _, p := range r.WireGuard.Peers {
if p.HandshakeAgeSec > 180 && p.LatestHandshake > 0 {
alerts = append(alerts, Alert{AlertWarning, "wireguard", host,
fmt.Sprintf("Stale WG handshake with peer %s: %ds ago", truncateKey(p.PublicKey), p.HandshakeAgeSec)})
}
if p.LatestHandshake == 0 {
alerts = append(alerts, Alert{AlertCritical, "wireguard", host,
fmt.Sprintf("WG peer %s has never handshaked", truncateKey(p.PublicKey))})
}
}
return alerts
}
func checkNodeSystem(r *report.NodeReport, host string) []Alert {
if r.System == nil {
return nil
}
var alerts []Alert
if r.System.MemUsePct > 90 {
alerts = append(alerts, Alert{AlertWarning, "system", host,
fmt.Sprintf("Memory at %d%%", r.System.MemUsePct)})
}
if r.System.DiskUsePct > 85 {
alerts = append(alerts, Alert{AlertWarning, "system", host,
fmt.Sprintf("Disk at %d%%", r.System.DiskUsePct)})
}
if r.System.OOMKills > 0 {
alerts = append(alerts, Alert{AlertCritical, "system", host,
fmt.Sprintf("%d OOM kills detected", r.System.OOMKills)})
}
if r.System.SwapUsedMB > 0 && r.System.SwapTotalMB > 0 {
pct := r.System.SwapUsedMB * 100 / r.System.SwapTotalMB
if pct > 30 {
alerts = append(alerts, Alert{AlertInfo, "system", host,
fmt.Sprintf("Swap usage at %d%%", pct)})
}
}
// High load
if r.System.CPUCount > 0 {
loadRatio := r.System.LoadAvg1 / float64(r.System.CPUCount)
if loadRatio > 2.0 {
alerts = append(alerts, Alert{AlertWarning, "system", host,
fmt.Sprintf("High load: %.1f (%.1fx CPU count)", r.System.LoadAvg1, loadRatio)})
}
}
return alerts
}
func checkNodeServices(r *report.NodeReport, host string) []Alert {
if r.Services == nil {
return nil
}
var alerts []Alert
for _, svc := range r.Services.Services {
if svc.ActiveState == "failed" {
alerts = append(alerts, Alert{AlertCritical, "service", host,
fmt.Sprintf("Service %s is FAILED", svc.Name)})
} else if svc.ActiveState != "active" && svc.ActiveState != "" && svc.ActiveState != "unknown" {
alerts = append(alerts, Alert{AlertWarning, "service", host,
fmt.Sprintf("Service %s is %s", svc.Name, svc.ActiveState)})
}
if svc.RestartLoopRisk {
alerts = append(alerts, Alert{AlertCritical, "service", host,
fmt.Sprintf("Service %s restart loop: %d restarts, active for %ds", svc.Name, svc.NRestarts, svc.ActiveSinceSec)})
}
}
for _, unit := range r.Services.FailedUnits {
alerts = append(alerts, Alert{AlertWarning, "service", host,
fmt.Sprintf("Failed systemd unit: %s", unit)})
}
return alerts
}
func checkNodeDNS(r *report.NodeReport, host string) []Alert {
if r.DNS == nil {
return nil
}
var alerts []Alert
if !r.DNS.CoreDNSActive {
alerts = append(alerts, Alert{AlertCritical, "dns", host, "CoreDNS is down"})
}
if !r.DNS.CaddyActive {
alerts = append(alerts, Alert{AlertCritical, "dns", host, "Caddy is down"})
}
if r.DNS.BaseTLSDaysLeft >= 0 && r.DNS.BaseTLSDaysLeft < 14 {
alerts = append(alerts, Alert{AlertWarning, "dns", host,
fmt.Sprintf("Base TLS cert expires in %d days", r.DNS.BaseTLSDaysLeft)})
}
if r.DNS.WildTLSDaysLeft >= 0 && r.DNS.WildTLSDaysLeft < 14 {
alerts = append(alerts, Alert{AlertWarning, "dns", host,
fmt.Sprintf("Wildcard TLS cert expires in %d days", r.DNS.WildTLSDaysLeft)})
}
if r.DNS.CoreDNSActive && !r.DNS.SOAResolves {
alerts = append(alerts, Alert{AlertWarning, "dns", host, "SOA record not resolving"})
}
return alerts
}
func checkNodeAnyone(r *report.NodeReport, host string) []Alert {
if r.Anyone == nil {
return nil
}
var alerts []Alert
if (r.Anyone.RelayActive || r.Anyone.ClientActive) && !r.Anyone.Bootstrapped {
alerts = append(alerts, Alert{AlertWarning, "anyone", host,
fmt.Sprintf("Anyone bootstrap at %d%%", r.Anyone.BootstrapPct)})
}
return alerts
}
func checkNodeProcesses(r *report.NodeReport, host string) []Alert {
if r.Processes == nil {
return nil
}
var alerts []Alert
if r.Processes.ZombieCount > 0 {
alerts = append(alerts, Alert{AlertInfo, "system", host,
fmt.Sprintf("%d zombie processes", r.Processes.ZombieCount)})
}
if r.Processes.OrphanCount > 0 {
alerts = append(alerts, Alert{AlertInfo, "system", host,
fmt.Sprintf("%d orphan orama processes", r.Processes.OrphanCount)})
}
if r.Processes.PanicCount > 0 {
alerts = append(alerts, Alert{AlertCritical, "system", host,
fmt.Sprintf("%d panic/fatal in orama-node logs (1h)", r.Processes.PanicCount)})
}
return alerts
}
func checkNodeNamespaces(r *report.NodeReport, host string) []Alert {
var alerts []Alert
for _, ns := range r.Namespaces {
if !ns.GatewayUp {
alerts = append(alerts, Alert{AlertWarning, "namespace", host,
fmt.Sprintf("Namespace %s gateway down", ns.Name)})
}
if !ns.RQLiteUp {
alerts = append(alerts, Alert{AlertWarning, "namespace", host,
fmt.Sprintf("Namespace %s RQLite down", ns.Name)})
}
}
return alerts
}
func checkNodeNetwork(r *report.NodeReport, host string) []Alert {
if r.Network == nil {
return nil
}
var alerts []Alert
if !r.Network.UFWActive {
alerts = append(alerts, Alert{AlertCritical, "network", host, "UFW firewall is inactive"})
}
if !r.Network.InternetReachable {
alerts = append(alerts, Alert{AlertWarning, "network", host, "Internet not reachable (ping 8.8.8.8 failed)"})
}
if r.Network.TCPRetransRate > 5.0 {
alerts = append(alerts, Alert{AlertWarning, "network", host,
fmt.Sprintf("High TCP retransmission rate: %.1f%%", r.Network.TCPRetransRate)})
}
return alerts
}
func truncateKey(key string) string {
if len(key) > 8 {
return key[:8] + "..."
}
return key
}

View File

@ -0,0 +1,115 @@
package monitor
import (
"context"
"encoding/json"
"fmt"
"sync"
"time"
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
"github.com/DeBrosOfficial/network/pkg/inspector"
)
// CollectorConfig holds configuration for the collection pipeline.
type CollectorConfig struct {
ConfigPath string
Env string
NodeFilter string
Timeout time.Duration
}
// CollectOnce runs `sudo orama node report --json` on all matching nodes
// in parallel and returns a ClusterSnapshot.
func CollectOnce(ctx context.Context, cfg CollectorConfig) (*ClusterSnapshot, error) {
nodes, err := inspector.LoadNodes(cfg.ConfigPath)
if err != nil {
return nil, fmt.Errorf("load nodes: %w", err)
}
nodes = inspector.FilterByEnv(nodes, cfg.Env)
if cfg.NodeFilter != "" {
nodes = filterByHost(nodes, cfg.NodeFilter)
}
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes found for env %q", cfg.Env)
}
timeout := cfg.Timeout
if timeout == 0 {
timeout = 30 * time.Second
}
start := time.Now()
snap := &ClusterSnapshot{
Environment: cfg.Env,
CollectedAt: start,
Nodes: make([]CollectionStatus, len(nodes)),
}
var wg sync.WaitGroup
for i, node := range nodes {
wg.Add(1)
go func(idx int, n inspector.Node) {
defer wg.Done()
snap.Nodes[idx] = collectNodeReport(ctx, n, timeout)
}(i, node)
}
wg.Wait()
snap.Duration = time.Since(start)
snap.Alerts = DeriveAlerts(snap)
return snap, nil
}
// collectNodeReport SSHes into a single node and parses the JSON report.
func collectNodeReport(ctx context.Context, node inspector.Node, timeout time.Duration) CollectionStatus {
nodeCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
start := time.Now()
result := inspector.RunSSH(nodeCtx, node, "sudo orama node report --json")
cs := CollectionStatus{
Node: node,
Duration: time.Since(start),
Retries: result.Retries,
}
if !result.OK() {
cs.Error = fmt.Errorf("SSH failed (exit %d): %s", result.ExitCode, truncate(result.Stderr, 200))
return cs
}
var rpt report.NodeReport
if err := json.Unmarshal([]byte(result.Stdout), &rpt); err != nil {
cs.Error = fmt.Errorf("parse report JSON: %w (first 200 bytes: %s)", err, truncate(result.Stdout, 200))
return cs
}
// Enrich with node metadata from remote-nodes.conf
if rpt.Hostname == "" {
rpt.Hostname = node.Host
}
rpt.PublicIP = node.Host
cs.Report = &rpt
return cs
}
func filterByHost(nodes []inspector.Node, host string) []inspector.Node {
var filtered []inspector.Node
for _, n := range nodes {
if n.Host == host {
filtered = append(filtered, n)
}
}
return filtered
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}

View File

@ -0,0 +1,64 @@
package display
import (
"fmt"
"io"
"sort"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// AlertsTable prints alerts sorted by severity to w.
func AlertsTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
critCount, warnCount := countAlerts(snap.Alerts)
fmt.Fprintf(w, "%s\n", styleBold.Render(
fmt.Sprintf("Alerts \u2014 %s (%d critical, %d warning)",
snap.Environment, critCount, warnCount)))
fmt.Fprintln(w, strings.Repeat("\u2550", 44))
fmt.Fprintln(w)
if len(snap.Alerts) == 0 {
fmt.Fprintln(w, styleGreen.Render(" No alerts"))
return nil
}
// Sort by severity: critical first, then warning, then info
sorted := make([]monitor.Alert, len(snap.Alerts))
copy(sorted, snap.Alerts)
sort.Slice(sorted, func(i, j int) bool {
return severityRank(sorted[i].Severity) < severityRank(sorted[j].Severity)
})
for _, a := range sorted {
tag := severityTag(a.Severity)
node := a.Node
if node == "" {
node = "cluster"
}
fmt.Fprintf(w, "%s %-18s %-12s %s\n",
tag, node, a.Subsystem, a.Message)
}
return nil
}
// AlertsJSON writes alerts as JSON.
func AlertsJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
return writeJSON(w, snap.Alerts)
}
// severityRank returns a sort rank for severity (lower = higher priority).
func severityRank(s monitor.AlertSeverity) int {
switch s {
case monitor.AlertCritical:
return 0
case monitor.AlertWarning:
return 1
case monitor.AlertInfo:
return 2
default:
return 3
}
}

View File

@ -0,0 +1,204 @@
package display
import (
"fmt"
"io"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// ClusterTable prints a cluster overview table to w.
func ClusterTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
dur := snap.Duration.Seconds()
fmt.Fprintf(w, "%s\n", styleBold.Render(
fmt.Sprintf("Cluster Overview \u2014 %s (%d nodes, collected in %.1fs)",
snap.Environment, snap.TotalCount(), dur)))
fmt.Fprintln(w, strings.Repeat("\u2550", 60))
fmt.Fprintln(w)
// Header
fmt.Fprintf(w, "%-18s %-12s %-6s %-6s %-11s %-5s %s\n",
styleHeader.Render("NODE"),
styleHeader.Render("ROLE"),
styleHeader.Render("MEM"),
styleHeader.Render("DISK"),
styleHeader.Render("RQLITE"),
styleHeader.Render("WG"),
styleHeader.Render("SERVICES"))
fmt.Fprintln(w, separator(70))
// Healthy nodes
for _, cs := range snap.Nodes {
if cs.Error != nil {
continue
}
r := cs.Report
if r == nil {
continue
}
host := cs.Node.Host
role := cs.Node.Role
// Memory %
memStr := "--"
if r.System != nil {
memStr = fmt.Sprintf("%d%%", r.System.MemUsePct)
}
// Disk %
diskStr := "--"
if r.System != nil {
diskStr = fmt.Sprintf("%d%%", r.System.DiskUsePct)
}
// RQLite state
rqliteStr := "--"
if r.RQLite != nil && r.RQLite.Responsive {
rqliteStr = r.RQLite.RaftState
} else if r.RQLite != nil {
rqliteStr = styleRed.Render("DOWN")
}
// WireGuard
wgStr := statusIcon(r.WireGuard != nil && r.WireGuard.InterfaceUp)
// Services: active/total
svcStr := "--"
if r.Services != nil {
active := 0
total := len(r.Services.Services)
for _, svc := range r.Services.Services {
if svc.ActiveState == "active" {
active++
}
}
svcStr = fmt.Sprintf("%d/%d", active, total)
}
fmt.Fprintf(w, "%-18s %-12s %-6s %-6s %-11s %-5s %s\n",
host, role, memStr, diskStr, rqliteStr, wgStr, svcStr)
}
// Unreachable nodes
failed := snap.Failed()
if len(failed) > 0 {
fmt.Fprintln(w)
for _, cs := range failed {
fmt.Fprintf(w, "%-18s %-12s %s\n",
styleRed.Render(cs.Node.Host),
cs.Node.Role,
styleRed.Render("UNREACHABLE"))
}
}
// Alerts summary
critCount, warnCount := countAlerts(snap.Alerts)
fmt.Fprintln(w)
fmt.Fprintf(w, "Alerts: %s critical, %s warning\n",
alertCountStr(critCount, monitor.AlertCritical),
alertCountStr(warnCount, monitor.AlertWarning))
for _, a := range snap.Alerts {
if a.Severity == monitor.AlertCritical || a.Severity == monitor.AlertWarning {
tag := severityTag(a.Severity)
fmt.Fprintf(w, " %s %s: %s\n", tag, a.Node, a.Message)
}
}
return nil
}
// ClusterJSON writes the cluster snapshot as JSON.
func ClusterJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
type clusterEntry struct {
Host string `json:"host"`
Role string `json:"role"`
MemPct int `json:"mem_pct"`
DiskPct int `json:"disk_pct"`
RQLite string `json:"rqlite_state"`
WGUp bool `json:"wg_up"`
Services string `json:"services"`
Status string `json:"status"`
Error string `json:"error,omitempty"`
}
var entries []clusterEntry
for _, cs := range snap.Nodes {
e := clusterEntry{
Host: cs.Node.Host,
Role: cs.Node.Role,
}
if cs.Error != nil {
e.Status = "unreachable"
e.Error = cs.Error.Error()
entries = append(entries, e)
continue
}
r := cs.Report
if r == nil {
e.Status = "unreachable"
entries = append(entries, e)
continue
}
e.Status = "ok"
if r.System != nil {
e.MemPct = r.System.MemUsePct
e.DiskPct = r.System.DiskUsePct
}
if r.RQLite != nil && r.RQLite.Responsive {
e.RQLite = r.RQLite.RaftState
}
e.WGUp = r.WireGuard != nil && r.WireGuard.InterfaceUp
if r.Services != nil {
active := 0
total := len(r.Services.Services)
for _, svc := range r.Services.Services {
if svc.ActiveState == "active" {
active++
}
}
e.Services = fmt.Sprintf("%d/%d", active, total)
}
entries = append(entries, e)
}
return writeJSON(w, entries)
}
// countAlerts returns the number of critical and warning alerts.
func countAlerts(alerts []monitor.Alert) (crit, warn int) {
for _, a := range alerts {
switch a.Severity {
case monitor.AlertCritical:
crit++
case monitor.AlertWarning:
warn++
}
}
return
}
// severityTag returns a colored tag like [CRIT], [WARN], [INFO].
func severityTag(s monitor.AlertSeverity) string {
switch s {
case monitor.AlertCritical:
return styleRed.Render("[CRIT]")
case monitor.AlertWarning:
return styleYellow.Render("[WARN]")
case monitor.AlertInfo:
return styleMuted.Render("[INFO]")
default:
return styleMuted.Render("[????]")
}
}
// alertCountStr renders the count with appropriate color.
func alertCountStr(count int, sev monitor.AlertSeverity) string {
s := fmt.Sprintf("%d", count)
if count > 0 {
return severityColor(sev).Render(s)
}
return s
}

View File

@ -0,0 +1,129 @@
package display
import (
"fmt"
"io"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// DNSTable prints DNS status for nameserver nodes to w.
func DNSTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
fmt.Fprintf(w, "%s\n", styleBold.Render(
fmt.Sprintf("DNS Status \u2014 %s", snap.Environment)))
fmt.Fprintln(w, strings.Repeat("\u2550", 22))
fmt.Fprintln(w)
// Header
fmt.Fprintf(w, "%-18s %-9s %-7s %-5s %-5s %-10s %-10s %s\n",
styleHeader.Render("NODE"),
styleHeader.Render("COREDNS"),
styleHeader.Render("CADDY"),
styleHeader.Render("SOA"),
styleHeader.Render("NS"),
styleHeader.Render("WILDCARD"),
styleHeader.Render("BASE TLS"),
styleHeader.Render("WILD TLS"))
fmt.Fprintln(w, separator(78))
found := false
for _, cs := range snap.Nodes {
// Only show nameserver nodes
if !cs.Node.IsNameserver() {
continue
}
found = true
if cs.Error != nil || cs.Report == nil {
fmt.Fprintf(w, "%-18s %s\n",
styleRed.Render(cs.Node.Host),
styleRed.Render("UNREACHABLE"))
continue
}
r := cs.Report
if r.DNS == nil {
fmt.Fprintf(w, "%-18s %s\n",
cs.Node.Host,
styleMuted.Render("no DNS data"))
continue
}
dns := r.DNS
fmt.Fprintf(w, "%-18s %-9s %-7s %-5s %-5s %-10s %-10s %s\n",
cs.Node.Host,
statusIcon(dns.CoreDNSActive),
statusIcon(dns.CaddyActive),
statusIcon(dns.SOAResolves),
statusIcon(dns.NSResolves),
statusIcon(dns.WildcardResolves),
tlsDaysStr(dns.BaseTLSDaysLeft),
tlsDaysStr(dns.WildTLSDaysLeft))
}
if !found {
fmt.Fprintln(w, styleMuted.Render(" No nameserver nodes found"))
}
return nil
}
// DNSJSON writes DNS status as JSON.
func DNSJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
type dnsEntry struct {
Host string `json:"host"`
CoreDNSActive bool `json:"coredns_active"`
CaddyActive bool `json:"caddy_active"`
SOAResolves bool `json:"soa_resolves"`
NSResolves bool `json:"ns_resolves"`
WildcardResolves bool `json:"wildcard_resolves"`
BaseTLSDaysLeft int `json:"base_tls_days_left"`
WildTLSDaysLeft int `json:"wild_tls_days_left"`
Error string `json:"error,omitempty"`
}
var entries []dnsEntry
for _, cs := range snap.Nodes {
if !cs.Node.IsNameserver() {
continue
}
e := dnsEntry{Host: cs.Node.Host}
if cs.Error != nil {
e.Error = cs.Error.Error()
entries = append(entries, e)
continue
}
if cs.Report == nil || cs.Report.DNS == nil {
entries = append(entries, e)
continue
}
dns := cs.Report.DNS
e.CoreDNSActive = dns.CoreDNSActive
e.CaddyActive = dns.CaddyActive
e.SOAResolves = dns.SOAResolves
e.NSResolves = dns.NSResolves
e.WildcardResolves = dns.WildcardResolves
e.BaseTLSDaysLeft = dns.BaseTLSDaysLeft
e.WildTLSDaysLeft = dns.WildTLSDaysLeft
entries = append(entries, e)
}
return writeJSON(w, entries)
}
// tlsDaysStr formats TLS days left with appropriate coloring.
func tlsDaysStr(days int) string {
if days < 0 {
return styleMuted.Render("--")
}
s := fmt.Sprintf("%d days", days)
switch {
case days < 7:
return styleRed.Render(s)
case days < 30:
return styleYellow.Render(s)
default:
return styleGreen.Render(s)
}
}

View File

@ -0,0 +1,194 @@
package display
import (
"fmt"
"io"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// MeshTable prints WireGuard mesh status to w.
func MeshTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
fmt.Fprintf(w, "%s\n", styleBold.Render(
fmt.Sprintf("WireGuard Mesh \u2014 %s", snap.Environment)))
fmt.Fprintln(w, strings.Repeat("\u2550", 28))
fmt.Fprintln(w)
// Header
fmt.Fprintf(w, "%-18s %-12s %-7s %-7s %s\n",
styleHeader.Render("NODE"),
styleHeader.Render("WG IP"),
styleHeader.Render("PORT"),
styleHeader.Render("PEERS"),
styleHeader.Render("STATUS"))
fmt.Fprintln(w, separator(54))
// Collect mesh info for peer details
type meshNode struct {
host string
wgIP string
port int
peers int
total int
healthy bool
}
var meshNodes []meshNode
expectedPeers := snap.HealthyCount() - 1
for _, cs := range snap.Nodes {
if cs.Error != nil || cs.Report == nil {
continue
}
r := cs.Report
if r.WireGuard == nil {
fmt.Fprintf(w, "%-18s %s\n", cs.Node.Host, styleMuted.Render("no WireGuard"))
continue
}
wg := r.WireGuard
peerCount := wg.PeerCount
allOK := wg.InterfaceUp
if allOK {
for _, p := range wg.Peers {
if p.LatestHandshake == 0 || p.HandshakeAgeSec > 180 {
allOK = false
break
}
}
}
mn := meshNode{
host: cs.Node.Host,
wgIP: wg.WgIP,
port: wg.ListenPort,
peers: peerCount,
total: expectedPeers,
healthy: allOK,
}
meshNodes = append(meshNodes, mn)
peerStr := fmt.Sprintf("%d/%d", peerCount, expectedPeers)
statusStr := statusIcon(allOK)
if !wg.InterfaceUp {
statusStr = styleRed.Render("DOWN")
}
fmt.Fprintf(w, "%-18s %-12s %-7d %-7s %s\n",
cs.Node.Host, wg.WgIP, wg.ListenPort, peerStr, statusStr)
}
// Peer details
fmt.Fprintln(w)
fmt.Fprintln(w, styleBold.Render("Peer Details:"))
for _, cs := range snap.Nodes {
if cs.Error != nil || cs.Report == nil || cs.Report.WireGuard == nil {
continue
}
wg := cs.Report.WireGuard
if !wg.InterfaceUp {
continue
}
localIP := wg.WgIP
for _, p := range wg.Peers {
hsAge := formatDuration(p.HandshakeAgeSec)
rx := formatBytes(p.TransferRx)
tx := formatBytes(p.TransferTx)
peerIP := p.AllowedIPs
// Strip CIDR if present
if idx := strings.Index(peerIP, "/"); idx > 0 {
peerIP = peerIP[:idx]
}
hsColor := styleGreen
if p.LatestHandshake == 0 {
hsAge = "never"
hsColor = styleRed
} else if p.HandshakeAgeSec > 180 {
hsColor = styleYellow
}
fmt.Fprintf(w, " %s \u2194 %s: handshake %s, rx: %s, tx: %s\n",
localIP, peerIP, hsColor.Render(hsAge), rx, tx)
}
}
return nil
}
// MeshJSON writes the WireGuard mesh as JSON.
func MeshJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
type peerEntry struct {
AllowedIPs string `json:"allowed_ips"`
HandshakeAgeSec int64 `json:"handshake_age_sec"`
TransferRxBytes int64 `json:"transfer_rx_bytes"`
TransferTxBytes int64 `json:"transfer_tx_bytes"`
}
type meshEntry struct {
Host string `json:"host"`
WgIP string `json:"wg_ip"`
ListenPort int `json:"listen_port"`
PeerCount int `json:"peer_count"`
Up bool `json:"up"`
Peers []peerEntry `json:"peers,omitempty"`
}
var entries []meshEntry
for _, cs := range snap.Nodes {
if cs.Error != nil || cs.Report == nil || cs.Report.WireGuard == nil {
continue
}
wg := cs.Report.WireGuard
e := meshEntry{
Host: cs.Node.Host,
WgIP: wg.WgIP,
ListenPort: wg.ListenPort,
PeerCount: wg.PeerCount,
Up: wg.InterfaceUp,
}
for _, p := range wg.Peers {
e.Peers = append(e.Peers, peerEntry{
AllowedIPs: p.AllowedIPs,
HandshakeAgeSec: p.HandshakeAgeSec,
TransferRxBytes: p.TransferRx,
TransferTxBytes: p.TransferTx,
})
}
entries = append(entries, e)
}
return writeJSON(w, entries)
}
// formatDuration formats seconds into a human-readable string.
func formatDuration(sec int64) string {
if sec < 60 {
return fmt.Sprintf("%ds ago", sec)
}
if sec < 3600 {
return fmt.Sprintf("%dm ago", sec/60)
}
return fmt.Sprintf("%dh ago", sec/3600)
}
// formatBytes formats bytes into a human-readable string.
func formatBytes(b int64) string {
const (
kb = 1024
mb = 1024 * kb
gb = 1024 * mb
)
switch {
case b >= gb:
return fmt.Sprintf("%.1fGB", float64(b)/float64(gb))
case b >= mb:
return fmt.Sprintf("%.1fMB", float64(b)/float64(mb))
case b >= kb:
return fmt.Sprintf("%.1fKB", float64(b)/float64(kb))
default:
return fmt.Sprintf("%dB", b)
}
}

View File

@ -0,0 +1,114 @@
package display
import (
"fmt"
"io"
"sort"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// NamespacesTable prints per-namespace health across nodes to w.
func NamespacesTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
fmt.Fprintf(w, "%s\n", styleBold.Render(
fmt.Sprintf("Namespace Health \u2014 %s", snap.Environment)))
fmt.Fprintln(w, strings.Repeat("\u2550", 28))
fmt.Fprintln(w)
// Collect all namespace entries across nodes
type nsRow struct {
namespace string
host string
rqlite string
olric string
gateway string
}
var rows []nsRow
nsNames := map[string]bool{}
for _, cs := range snap.Nodes {
if cs.Error != nil || cs.Report == nil {
continue
}
for _, ns := range cs.Report.Namespaces {
nsNames[ns.Name] = true
rqliteStr := statusIcon(ns.RQLiteUp)
if ns.RQLiteUp && ns.RQLiteState != "" {
rqliteStr = ns.RQLiteState
}
rows = append(rows, nsRow{
namespace: ns.Name,
host: cs.Node.Host,
rqlite: rqliteStr,
olric: statusIcon(ns.OlricUp),
gateway: statusIcon(ns.GatewayUp),
})
}
}
if len(rows) == 0 {
fmt.Fprintln(w, styleMuted.Render(" No namespaces found"))
return nil
}
// Sort by namespace name, then host
sort.Slice(rows, func(i, j int) bool {
if rows[i].namespace != rows[j].namespace {
return rows[i].namespace < rows[j].namespace
}
return rows[i].host < rows[j].host
})
// Header
fmt.Fprintf(w, "%-13s %-18s %-11s %-7s %s\n",
styleHeader.Render("NAMESPACE"),
styleHeader.Render("NODE"),
styleHeader.Render("RQLITE"),
styleHeader.Render("OLRIC"),
styleHeader.Render("GATEWAY"))
fmt.Fprintln(w, separator(58))
for _, r := range rows {
fmt.Fprintf(w, "%-13s %-18s %-11s %-7s %s\n",
r.namespace, r.host, r.rqlite, r.olric, r.gateway)
}
return nil
}
// NamespacesJSON writes namespace health as JSON.
func NamespacesJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
type nsEntry struct {
Namespace string `json:"namespace"`
Host string `json:"host"`
RQLiteUp bool `json:"rqlite_up"`
RQLiteState string `json:"rqlite_state,omitempty"`
OlricUp bool `json:"olric_up"`
GatewayUp bool `json:"gateway_up"`
GatewayStatus int `json:"gateway_status,omitempty"`
}
var entries []nsEntry
for _, cs := range snap.Nodes {
if cs.Error != nil || cs.Report == nil {
continue
}
for _, ns := range cs.Report.Namespaces {
entries = append(entries, nsEntry{
Namespace: ns.Name,
Host: cs.Node.Host,
RQLiteUp: ns.RQLiteUp,
RQLiteState: ns.RQLiteState,
OlricUp: ns.OlricUp,
GatewayUp: ns.GatewayUp,
GatewayStatus: ns.GatewayStatus,
})
}
}
return writeJSON(w, entries)
}

View File

@ -0,0 +1,167 @@
package display
import (
"fmt"
"io"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// NodeTable prints detailed per-node information to w.
func NodeTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
for i, cs := range snap.Nodes {
if i > 0 {
fmt.Fprintln(w)
}
host := cs.Node.Host
role := cs.Node.Role
if cs.Error != nil {
fmt.Fprintf(w, "%s (%s)\n", styleRed.Render("Node: "+host), role)
fmt.Fprintf(w, " %s\n", styleRed.Render(fmt.Sprintf("UNREACHABLE: %v", cs.Error)))
continue
}
r := cs.Report
if r == nil {
fmt.Fprintf(w, "%s (%s)\n", styleRed.Render("Node: "+host), role)
fmt.Fprintf(w, " %s\n", styleRed.Render("No report available"))
continue
}
fmt.Fprintf(w, "%s\n", styleBold.Render(fmt.Sprintf("Node: %s (%s)", host, role)))
// System
if r.System != nil {
sys := r.System
fmt.Fprintf(w, " System: CPU %d | Load %.2f | Mem %d%% (%d/%d MB) | Disk %d%%\n",
sys.CPUCount, sys.LoadAvg1, sys.MemUsePct, sys.MemUsedMB, sys.MemTotalMB, sys.DiskUsePct)
} else {
fmt.Fprintln(w, " System: "+styleMuted.Render("no data"))
}
// RQLite
if r.RQLite != nil {
rq := r.RQLite
readyStr := styleRed.Render("Not Ready")
if rq.Ready {
readyStr = styleGreen.Render("Ready")
}
if rq.Responsive {
fmt.Fprintf(w, " RQLite: %s | Term %d | Applied %d | Peers %d | %s\n",
rq.RaftState, rq.Term, rq.Applied, rq.NumPeers, readyStr)
} else {
fmt.Fprintf(w, " RQLite: %s\n", styleRed.Render("NOT RESPONDING"))
}
} else {
fmt.Fprintln(w, " RQLite: "+styleMuted.Render("not configured"))
}
// WireGuard
if r.WireGuard != nil {
wg := r.WireGuard
if wg.InterfaceUp {
// Check handshakes
hsOK := true
for _, p := range wg.Peers {
if p.LatestHandshake == 0 || p.HandshakeAgeSec > 180 {
hsOK = false
break
}
}
hsStr := statusIcon(hsOK)
fmt.Fprintf(w, " WireGuard: UP | %s | %d peers | handshakes %s\n",
wg.WgIP, wg.PeerCount, hsStr)
} else {
fmt.Fprintf(w, " WireGuard: %s\n", styleRed.Render("DOWN"))
}
} else {
fmt.Fprintln(w, " WireGuard: "+styleMuted.Render("not configured"))
}
// Olric
if r.Olric != nil {
ol := r.Olric
stateStr := styleRed.Render("inactive")
if ol.ServiceActive {
stateStr = styleGreen.Render("active")
}
fmt.Fprintf(w, " Olric: %s | %d members\n", stateStr, ol.MemberCount)
} else {
fmt.Fprintln(w, " Olric: "+styleMuted.Render("not configured"))
}
// IPFS
if r.IPFS != nil {
ipfs := r.IPFS
daemonStr := styleRed.Render("inactive")
if ipfs.DaemonActive {
daemonStr = styleGreen.Render("active")
}
clusterStr := styleRed.Render("DOWN")
if ipfs.ClusterActive {
clusterStr = styleGreen.Render("OK")
}
fmt.Fprintf(w, " IPFS: %s | %d swarm peers | cluster %s\n",
daemonStr, ipfs.SwarmPeerCount, clusterStr)
} else {
fmt.Fprintln(w, " IPFS: "+styleMuted.Render("not configured"))
}
// Anyone
if r.Anyone != nil {
an := r.Anyone
mode := an.Mode
if mode == "" {
if an.RelayActive {
mode = "relay"
} else if an.ClientActive {
mode = "client"
} else {
mode = "inactive"
}
}
bootStr := styleRed.Render("not bootstrapped")
if an.Bootstrapped {
bootStr = styleGreen.Render("bootstrapped")
}
fmt.Fprintf(w, " Anyone: %s | %s\n", mode, bootStr)
} else {
fmt.Fprintln(w, " Anyone: "+styleMuted.Render("not configured"))
}
}
return nil
}
// NodeJSON writes the node details as JSON.
func NodeJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
type nodeDetail struct {
Host string `json:"host"`
Role string `json:"role"`
Status string `json:"status"`
Error string `json:"error,omitempty"`
Report interface{} `json:"report,omitempty"`
}
var entries []nodeDetail
for _, cs := range snap.Nodes {
e := nodeDetail{
Host: cs.Node.Host,
Role: cs.Node.Role,
}
if cs.Error != nil {
e.Status = "unreachable"
e.Error = cs.Error.Error()
} else if cs.Report != nil {
e.Status = "ok"
e.Report = cs.Report
} else {
e.Status = "unknown"
}
entries = append(entries, e)
}
return writeJSON(w, entries)
}

View File

@ -0,0 +1,182 @@
package display
import (
"io"
"time"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
)
type fullReport struct {
Meta struct {
Environment string `json:"environment"`
CollectedAt time.Time `json:"collected_at"`
DurationSec float64 `json:"duration_seconds"`
NodeCount int `json:"node_count"`
HealthyCount int `json:"healthy_count"`
FailedCount int `json:"failed_count"`
} `json:"meta"`
Summary struct {
RQLiteLeader string `json:"rqlite_leader"`
RQLiteQuorum string `json:"rqlite_quorum"`
WGMeshStatus string `json:"wg_mesh_status"`
ServiceHealth string `json:"service_health"`
CriticalAlerts int `json:"critical_alerts"`
WarningAlerts int `json:"warning_alerts"`
} `json:"summary"`
Alerts []monitor.Alert `json:"alerts"`
Nodes []nodeEntry `json:"nodes"`
}
type nodeEntry struct {
Host string `json:"host"`
Role string `json:"role"`
Status string `json:"status"` // "ok", "unreachable", "degraded"
Report *report.NodeReport `json:"report,omitempty"`
Error string `json:"error,omitempty"`
}
// FullReport outputs the LLM-optimized JSON report to w.
func FullReport(snap *monitor.ClusterSnapshot, w io.Writer) error {
fr := fullReport{}
// Meta
fr.Meta.Environment = snap.Environment
fr.Meta.CollectedAt = snap.CollectedAt
fr.Meta.DurationSec = snap.Duration.Seconds()
fr.Meta.NodeCount = snap.TotalCount()
fr.Meta.HealthyCount = snap.HealthyCount()
fr.Meta.FailedCount = len(snap.Failed())
// Summary
fr.Summary.RQLiteLeader = findRQLiteLeader(snap)
fr.Summary.RQLiteQuorum = computeQuorumStatus(snap)
fr.Summary.WGMeshStatus = computeWGMeshStatus(snap)
fr.Summary.ServiceHealth = computeServiceHealth(snap)
crit, warn := countAlerts(snap.Alerts)
fr.Summary.CriticalAlerts = crit
fr.Summary.WarningAlerts = warn
// Alerts
fr.Alerts = snap.Alerts
// Build set of hosts with critical alerts for "degraded" detection
criticalHosts := map[string]bool{}
for _, a := range snap.Alerts {
if a.Severity == monitor.AlertCritical && a.Node != "" && a.Node != "cluster" {
criticalHosts[a.Node] = true
}
}
// Nodes
for _, cs := range snap.Nodes {
ne := nodeEntry{
Host: cs.Node.Host,
Role: cs.Node.Role,
}
if cs.Error != nil {
ne.Status = "unreachable"
ne.Error = cs.Error.Error()
} else if cs.Report != nil {
if criticalHosts[cs.Node.Host] {
ne.Status = "degraded"
} else {
ne.Status = "ok"
}
ne.Report = cs.Report
} else {
ne.Status = "unreachable"
}
fr.Nodes = append(fr.Nodes, ne)
}
return writeJSON(w, fr)
}
// findRQLiteLeader returns the host of the RQLite leader, or "none".
func findRQLiteLeader(snap *monitor.ClusterSnapshot) string {
for _, cs := range snap.Nodes {
if cs.Report != nil && cs.Report.RQLite != nil && cs.Report.RQLite.RaftState == "Leader" {
return cs.Node.Host
}
}
return "none"
}
// computeQuorumStatus returns "ok", "degraded", or "lost".
func computeQuorumStatus(snap *monitor.ClusterSnapshot) string {
total := 0
responsive := 0
for _, cs := range snap.Nodes {
if cs.Report != nil && cs.Report.RQLite != nil {
total++
if cs.Report.RQLite.Responsive {
responsive++
}
}
}
if total == 0 {
return "unknown"
}
quorum := (total / 2) + 1
if responsive >= quorum {
return "ok"
}
if responsive > 0 {
return "degraded"
}
return "lost"
}
// computeWGMeshStatus returns "ok", "degraded", or "down".
func computeWGMeshStatus(snap *monitor.ClusterSnapshot) string {
totalWG := 0
upCount := 0
for _, cs := range snap.Nodes {
if cs.Report != nil && cs.Report.WireGuard != nil {
totalWG++
if cs.Report.WireGuard.InterfaceUp {
upCount++
}
}
}
if totalWG == 0 {
return "unknown"
}
if upCount == totalWG {
return "ok"
}
if upCount > 0 {
return "degraded"
}
return "down"
}
// computeServiceHealth returns "ok", "degraded", or "critical".
func computeServiceHealth(snap *monitor.ClusterSnapshot) string {
totalSvc := 0
failedSvc := 0
for _, cs := range snap.Nodes {
if cs.Report == nil || cs.Report.Services == nil {
continue
}
for _, svc := range cs.Report.Services.Services {
totalSvc++
if svc.ActiveState == "failed" {
failedSvc++
}
}
}
if totalSvc == 0 {
return "unknown"
}
if failedSvc == 0 {
return "ok"
}
if failedSvc < totalSvc/2 {
return "degraded"
}
return "critical"
}

View File

@ -0,0 +1,131 @@
package display
import (
"fmt"
"io"
"sort"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// ServiceTable prints a cross-node service status matrix to w.
func ServiceTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
fmt.Fprintf(w, "%s\n", styleBold.Render(
fmt.Sprintf("Service Status Matrix \u2014 %s", snap.Environment)))
fmt.Fprintln(w, strings.Repeat("\u2550", 36))
fmt.Fprintln(w)
// Collect all service names and build per-host maps
type hostServices struct {
host string
shortIP string
services map[string]string // name -> active_state
}
var hosts []hostServices
serviceSet := map[string]bool{}
for _, cs := range snap.Nodes {
if cs.Error != nil || cs.Report == nil || cs.Report.Services == nil {
continue
}
hs := hostServices{
host: cs.Node.Host,
shortIP: shortIP(cs.Node.Host),
services: make(map[string]string),
}
for _, svc := range cs.Report.Services.Services {
hs.services[svc.Name] = svc.ActiveState
serviceSet[svc.Name] = true
}
hosts = append(hosts, hs)
}
// Sort service names
var svcNames []string
for name := range serviceSet {
svcNames = append(svcNames, name)
}
sort.Strings(svcNames)
if len(hosts) == 0 || len(svcNames) == 0 {
fmt.Fprintln(w, styleMuted.Render(" No service data available"))
return nil
}
// Header: SERVICE + each host short IP
hdr := fmt.Sprintf("%-22s", styleHeader.Render("SERVICE"))
for _, h := range hosts {
hdr += fmt.Sprintf("%-12s", styleHeader.Render(h.shortIP))
}
fmt.Fprintln(w, hdr)
fmt.Fprintln(w, separator(22+12*len(hosts)))
// Rows
for _, name := range svcNames {
row := fmt.Sprintf("%-22s", name)
for _, h := range hosts {
state, ok := h.services[name]
if !ok {
row += fmt.Sprintf("%-12s", styleMuted.Render("--"))
} else {
row += fmt.Sprintf("%-12s", colorServiceState(state))
}
}
fmt.Fprintln(w, row)
}
return nil
}
// ServiceJSON writes the service matrix as JSON.
func ServiceJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
type svcEntry struct {
Host string `json:"host"`
Services map[string]string `json:"services"`
}
var entries []svcEntry
for _, cs := range snap.Nodes {
if cs.Error != nil || cs.Report == nil || cs.Report.Services == nil {
continue
}
e := svcEntry{
Host: cs.Node.Host,
Services: make(map[string]string),
}
for _, svc := range cs.Report.Services.Services {
e.Services[svc.Name] = svc.ActiveState
}
entries = append(entries, e)
}
return writeJSON(w, entries)
}
// shortIP truncates an IP to the first 3 octets for compact display.
func shortIP(ip string) string {
parts := strings.Split(ip, ".")
if len(parts) == 4 {
return parts[0] + "." + parts[1] + "." + parts[2]
}
if len(ip) > 12 {
return ip[:12]
}
return ip
}
// colorServiceState renders a service state with appropriate color.
func colorServiceState(state string) string {
switch state {
case "active":
return styleGreen.Render("ACTIVE")
case "failed":
return styleRed.Render("FAILED")
case "inactive":
return styleMuted.Render("inactive")
default:
return styleYellow.Render(state)
}
}

View File

@ -0,0 +1,53 @@
package display
import (
"encoding/json"
"io"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
"github.com/charmbracelet/lipgloss"
)
var (
styleGreen = lipgloss.NewStyle().Foreground(lipgloss.Color("#00ff00"))
styleRed = lipgloss.NewStyle().Foreground(lipgloss.Color("#ff0000"))
styleYellow = lipgloss.NewStyle().Foreground(lipgloss.Color("#ffff00"))
styleMuted = lipgloss.NewStyle().Foreground(lipgloss.Color("#888888"))
styleBold = lipgloss.NewStyle().Bold(true)
styleHeader = lipgloss.NewStyle().Bold(true).Foreground(lipgloss.Color("#ffffff"))
)
// statusIcon returns a green "OK" or red "!!" indicator.
func statusIcon(ok bool) string {
if ok {
return styleGreen.Render("OK")
}
return styleRed.Render("!!")
}
// severityColor returns the lipgloss style for a given alert severity.
func severityColor(s monitor.AlertSeverity) lipgloss.Style {
switch s {
case monitor.AlertCritical:
return styleRed
case monitor.AlertWarning:
return styleYellow
case monitor.AlertInfo:
return styleMuted
default:
return styleMuted
}
}
// separator returns a dashed line of the given width.
func separator(width int) string {
return strings.Repeat("\u2500", width)
}
// writeJSON encodes v as indented JSON to w.
func writeJSON(w io.Writer, v interface{}) error {
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
return enc.Encode(v)
}

View File

@ -0,0 +1,75 @@
package monitor
import (
"time"
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
"github.com/DeBrosOfficial/network/pkg/inspector"
)
// CollectionStatus tracks the SSH collection result for a single node.
type CollectionStatus struct {
Node inspector.Node
Report *report.NodeReport
Error error
Duration time.Duration
Retries int
}
// ClusterSnapshot is the aggregated state of the entire cluster at a point in time.
type ClusterSnapshot struct {
Environment string
CollectedAt time.Time
Duration time.Duration
Nodes []CollectionStatus
Alerts []Alert
}
// Healthy returns only nodes that reported successfully.
func (cs *ClusterSnapshot) Healthy() []*report.NodeReport {
var out []*report.NodeReport
for _, n := range cs.Nodes {
if n.Report != nil {
out = append(out, n.Report)
}
}
return out
}
// Failed returns nodes where SSH or parsing failed.
func (cs *ClusterSnapshot) Failed() []CollectionStatus {
var out []CollectionStatus
for _, n := range cs.Nodes {
if n.Error != nil {
out = append(out, n)
}
}
return out
}
// ByHost returns a map of host -> NodeReport for quick lookup.
func (cs *ClusterSnapshot) ByHost() map[string]*report.NodeReport {
m := make(map[string]*report.NodeReport, len(cs.Nodes))
for _, n := range cs.Nodes {
if n.Report != nil {
m[n.Node.Host] = n.Report
}
}
return m
}
// HealthyCount returns the number of nodes that reported successfully.
func (cs *ClusterSnapshot) HealthyCount() int {
count := 0
for _, n := range cs.Nodes {
if n.Report != nil {
count++
}
}
return count
}
// TotalCount returns the total number of nodes attempted.
func (cs *ClusterSnapshot) TotalCount() int {
return len(cs.Nodes)
}

View File

@ -0,0 +1,88 @@
package tui
import (
"fmt"
"sort"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// renderAlertsTab renders all alerts sorted by severity.
func renderAlertsTab(snap *monitor.ClusterSnapshot, width int) string {
if snap == nil {
return styleMuted.Render("Collecting cluster data...")
}
if len(snap.Alerts) == 0 {
return styleHealthy.Render(" No alerts. All systems nominal.")
}
var b strings.Builder
critCount, warnCount, infoCount := countAlertsBySeverity(snap.Alerts)
b.WriteString(styleBold.Render("Alerts"))
b.WriteString(fmt.Sprintf(" %s %s %s\n",
styleCritical.Render(fmt.Sprintf("%d critical", critCount)),
styleWarning.Render(fmt.Sprintf("%d warning", warnCount)),
styleMuted.Render(fmt.Sprintf("%d info", infoCount)),
))
b.WriteString(separator(width))
b.WriteString("\n\n")
// Sort: critical first, then warning, then info
sorted := make([]monitor.Alert, len(snap.Alerts))
copy(sorted, snap.Alerts)
sort.Slice(sorted, func(i, j int) bool {
return severityRank(sorted[i].Severity) < severityRank(sorted[j].Severity)
})
// Group by severity
currentSev := monitor.AlertSeverity("")
for _, a := range sorted {
if a.Severity != currentSev {
currentSev = a.Severity
label := strings.ToUpper(string(a.Severity))
b.WriteString(severityStyle(string(a.Severity)).Render(fmt.Sprintf(" ── %s ", label)))
b.WriteString("\n")
}
sevTag := formatSeverityTag(a.Severity)
b.WriteString(fmt.Sprintf(" %s %-12s %-18s %s\n",
sevTag,
styleMuted.Render("["+a.Subsystem+"]"),
a.Node,
a.Message,
))
}
return b.String()
}
// severityRank returns a sort rank (lower = more severe).
func severityRank(s monitor.AlertSeverity) int {
switch s {
case monitor.AlertCritical:
return 0
case monitor.AlertWarning:
return 1
case monitor.AlertInfo:
return 2
default:
return 3
}
}
// formatSeverityTag returns a styled severity label.
func formatSeverityTag(s monitor.AlertSeverity) string {
switch s {
case monitor.AlertCritical:
return styleCritical.Render("CRIT")
case monitor.AlertWarning:
return styleWarning.Render("WARN")
case monitor.AlertInfo:
return styleMuted.Render("INFO")
default:
return styleMuted.Render("????")
}
}

109
pkg/cli/monitor/tui/dns.go Normal file
View File

@ -0,0 +1,109 @@
package tui
import (
"fmt"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// renderDNSTab renders DNS status for nameserver nodes.
func renderDNSTab(snap *monitor.ClusterSnapshot, width int) string {
if snap == nil {
return styleMuted.Render("Collecting cluster data...")
}
if snap.HealthyCount() == 0 {
return styleMuted.Render("No healthy nodes to display.")
}
var b strings.Builder
b.WriteString(styleBold.Render("DNS / Nameserver Status"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n\n")
hasDNS := false
for _, cs := range snap.Nodes {
if cs.Report == nil || cs.Report.DNS == nil {
continue
}
hasDNS = true
r := cs.Report
dns := r.DNS
host := nodeHost(r)
role := cs.Node.Role
b.WriteString(styleBold.Render(fmt.Sprintf(" %s", host)))
if role != "" {
b.WriteString(fmt.Sprintf(" (%s)", role))
}
b.WriteString("\n")
// Service status
b.WriteString(fmt.Sprintf(" CoreDNS: %s", statusStr(dns.CoreDNSActive)))
if dns.CoreDNSMemMB > 0 {
b.WriteString(fmt.Sprintf(" mem=%dMB", dns.CoreDNSMemMB))
}
if dns.CoreDNSRestarts > 0 {
b.WriteString(fmt.Sprintf(" restarts=%s", styleWarning.Render(fmt.Sprintf("%d", dns.CoreDNSRestarts))))
}
b.WriteString("\n")
b.WriteString(fmt.Sprintf(" Caddy: %s\n", statusStr(dns.CaddyActive)))
// Port bindings
b.WriteString(fmt.Sprintf(" Ports: 53=%s 80=%s 443=%s\n",
statusStr(dns.Port53Bound),
statusStr(dns.Port80Bound),
statusStr(dns.Port443Bound),
))
// DNS resolution checks
b.WriteString(fmt.Sprintf(" SOA: %s\n", statusStr(dns.SOAResolves)))
b.WriteString(fmt.Sprintf(" NS: %s", statusStr(dns.NSResolves)))
if dns.NSRecordCount > 0 {
b.WriteString(fmt.Sprintf(" (%d records)", dns.NSRecordCount))
}
b.WriteString("\n")
b.WriteString(fmt.Sprintf(" Base A: %s\n", statusStr(dns.BaseAResolves)))
b.WriteString(fmt.Sprintf(" Wildcard: %s\n", statusStr(dns.WildcardResolves)))
b.WriteString(fmt.Sprintf(" Corefile: %s\n", statusStr(dns.CorefileExists)))
// TLS certificates
baseTLS := renderTLSDays(dns.BaseTLSDaysLeft, "base")
wildTLS := renderTLSDays(dns.WildTLSDaysLeft, "wildcard")
b.WriteString(fmt.Sprintf(" TLS: %s %s\n", baseTLS, wildTLS))
// Log errors
if dns.LogErrors > 0 {
b.WriteString(fmt.Sprintf(" Log errors: %s (5m)\n",
styleWarning.Render(fmt.Sprintf("%d", dns.LogErrors))))
}
b.WriteString("\n")
}
if !hasDNS {
return styleMuted.Render("No nameserver nodes found (no DNS data reported).")
}
return b.String()
}
// renderTLSDays formats TLS certificate expiry with color coding.
func renderTLSDays(days int, label string) string {
if days < 0 {
return styleMuted.Render(fmt.Sprintf("%s: n/a", label))
}
s := fmt.Sprintf("%s: %dd", label, days)
switch {
case days < 7:
return styleCritical.Render(s)
case days < 14:
return styleWarning.Render(s)
default:
return styleHealthy.Render(s)
}
}

View File

@ -0,0 +1,21 @@
package tui
import "github.com/charmbracelet/bubbles/key"
type keyMap struct {
Quit key.Binding
NextTab key.Binding
PrevTab key.Binding
Refresh key.Binding
ScrollUp key.Binding
ScrollDown key.Binding
}
var keys = keyMap{
Quit: key.NewBinding(key.WithKeys("q", "ctrl+c"), key.WithHelp("q", "quit")),
NextTab: key.NewBinding(key.WithKeys("tab", "l"), key.WithHelp("tab", "next tab")),
PrevTab: key.NewBinding(key.WithKeys("shift+tab", "h"), key.WithHelp("shift+tab", "prev tab")),
Refresh: key.NewBinding(key.WithKeys("r"), key.WithHelp("r", "refresh")),
ScrollUp: key.NewBinding(key.WithKeys("up", "k")),
ScrollDown: key.NewBinding(key.WithKeys("down", "j")),
}

View File

@ -0,0 +1,226 @@
package tui
import (
"context"
"fmt"
"time"
"github.com/charmbracelet/bubbles/viewport"
tea "github.com/charmbracelet/bubbletea"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
const (
tabOverview = iota
tabNodes
tabServices
tabMesh
tabDNS
tabNamespaces
tabAlerts
tabCount
)
var tabNames = []string{"Overview", "Nodes", "Services", "WG Mesh", "DNS", "Namespaces", "Alerts"}
// snapshotMsg carries the result of a background collection.
type snapshotMsg struct {
snap *monitor.ClusterSnapshot
err error
}
// tickMsg fires on each refresh interval.
type tickMsg time.Time
// model is the root Bubbletea model for the Orama monitor TUI.
type model struct {
cfg monitor.CollectorConfig
interval time.Duration
activeTab int
viewport viewport.Model
width int
height int
snapshot *monitor.ClusterSnapshot
loading bool
lastError error
lastUpdate time.Time
quitting bool
}
// newModel creates a fresh model with default viewport dimensions.
func newModel(cfg monitor.CollectorConfig, interval time.Duration) model {
vp := viewport.New(80, 24)
return model{
cfg: cfg,
interval: interval,
viewport: vp,
loading: true,
}
}
func (m model) Init() tea.Cmd {
return tea.Batch(doCollect(m.cfg), tickCmd(m.interval))
}
func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
var cmds []tea.Cmd
switch msg := msg.(type) {
case tea.KeyMsg:
switch {
case msg.String() == "q" || msg.String() == "ctrl+c":
m.quitting = true
return m, tea.Quit
case msg.String() == "tab" || msg.String() == "l":
m.activeTab = (m.activeTab + 1) % tabCount
m.updateContent()
m.viewport.GotoTop()
return m, nil
case msg.String() == "shift+tab" || msg.String() == "h":
m.activeTab = (m.activeTab - 1 + tabCount) % tabCount
m.updateContent()
m.viewport.GotoTop()
return m, nil
case msg.String() == "r":
if !m.loading {
m.loading = true
return m, doCollect(m.cfg)
}
return m, nil
default:
// Delegate scrolling to viewport
var cmd tea.Cmd
m.viewport, cmd = m.viewport.Update(msg)
return m, cmd
}
case tea.WindowSizeMsg:
m.width = msg.Width
m.height = msg.Height
// Reserve 4 lines: header, tab bar, blank separator, footer
vpHeight := msg.Height - 4
if vpHeight < 1 {
vpHeight = 1
}
m.viewport.Width = msg.Width
m.viewport.Height = vpHeight
m.updateContent()
return m, nil
case snapshotMsg:
m.loading = false
if msg.err != nil {
m.lastError = msg.err
} else {
m.snapshot = msg.snap
m.lastError = nil
m.lastUpdate = time.Now()
}
m.updateContent()
return m, nil
case tickMsg:
if !m.loading {
m.loading = true
cmds = append(cmds, doCollect(m.cfg))
}
cmds = append(cmds, tickCmd(m.interval))
return m, tea.Batch(cmds...)
}
return m, nil
}
func (m model) View() string {
if m.quitting {
return ""
}
// Header
var header string
if m.snapshot != nil {
ago := time.Since(m.lastUpdate).Truncate(time.Second)
header = headerStyle.Render(fmt.Sprintf(
"Orama Monitor — %s — Last: %s (%s ago)",
m.snapshot.Environment,
m.lastUpdate.Format("15:04:05"),
ago,
))
} else if m.loading {
header = headerStyle.Render("Orama Monitor — collecting...")
} else if m.lastError != nil {
header = headerStyle.Render(fmt.Sprintf("Orama Monitor — error: %v", m.lastError))
} else {
header = headerStyle.Render("Orama Monitor")
}
if m.loading && m.snapshot != nil {
header += styleMuted.Render(" (refreshing...)")
}
// Tab bar
tabs := renderTabBar(m.activeTab, m.width)
// Footer
footer := footerStyle.Render("tab: switch | j/k: scroll | r: refresh | q: quit")
return header + "\n" + tabs + "\n" + m.viewport.View() + "\n" + footer
}
// updateContent renders the active tab and sets it on the viewport.
func (m *model) updateContent() {
w := m.width
if w == 0 {
w = 80
}
var content string
switch m.activeTab {
case tabOverview:
content = renderOverview(m.snapshot, w)
case tabNodes:
content = renderNodes(m.snapshot, w)
case tabServices:
content = renderServicesTab(m.snapshot, w)
case tabMesh:
content = renderWGMesh(m.snapshot, w)
case tabDNS:
content = renderDNSTab(m.snapshot, w)
case tabNamespaces:
content = renderNamespacesTab(m.snapshot, w)
case tabAlerts:
content = renderAlertsTab(m.snapshot, w)
}
m.viewport.SetContent(content)
}
// doCollect returns a tea.Cmd that runs monitor.CollectOnce in a goroutine.
func doCollect(cfg monitor.CollectorConfig) tea.Cmd {
return func() tea.Msg {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
snap, err := monitor.CollectOnce(ctx, cfg)
return snapshotMsg{snap: snap, err: err}
}
}
// tickCmd returns a tea.Cmd that fires a tickMsg after the given interval.
func tickCmd(d time.Duration) tea.Cmd {
return tea.Tick(d, func(t time.Time) tea.Msg {
return tickMsg(t)
})
}
// Run starts the TUI program with the given collector config.
func Run(cfg monitor.CollectorConfig) error {
m := newModel(cfg, 30*time.Second)
p := tea.NewProgram(m, tea.WithAltScreen())
_, err := p.Run()
return err
}

View File

@ -0,0 +1,158 @@
package tui
import (
"fmt"
"sort"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// renderNamespacesTab renders per-namespace health across all nodes.
func renderNamespacesTab(snap *monitor.ClusterSnapshot, width int) string {
if snap == nil {
return styleMuted.Render("Collecting cluster data...")
}
reports := snap.Healthy()
if len(reports) == 0 {
return styleMuted.Render("No healthy nodes to display.")
}
var b strings.Builder
b.WriteString(styleBold.Render("Namespace Health"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n\n")
// Collect unique namespace names
nsSet := make(map[string]bool)
for _, r := range reports {
for _, ns := range r.Namespaces {
nsSet[ns.Name] = true
}
}
nsNames := make([]string, 0, len(nsSet))
for name := range nsSet {
nsNames = append(nsNames, name)
}
sort.Strings(nsNames)
if len(nsNames) == 0 {
return styleMuted.Render("No namespaces found on any node.")
}
// Header
header := fmt.Sprintf(" %-20s", headerStyle.Render("NAMESPACE"))
for _, r := range reports {
host := nodeHost(r)
if len(host) > 15 {
host = host[:15]
}
header += fmt.Sprintf(" %-17s", headerStyle.Render(host))
}
b.WriteString(header)
b.WriteString("\n")
// Build lookup: host -> ns name -> NamespaceReport
type nsKey struct {
host string
name string
}
nsMap := make(map[nsKey]nsStatus)
for _, r := range reports {
host := nodeHost(r)
for _, ns := range r.Namespaces {
nsMap[nsKey{host, ns.Name}] = nsStatus{
gateway: ns.GatewayUp,
rqlite: ns.RQLiteUp,
rqliteState: ns.RQLiteState,
rqliteReady: ns.RQLiteReady,
olric: ns.OlricUp,
}
}
}
// Rows
for _, nsName := range nsNames {
row := fmt.Sprintf(" %-20s", nsName)
for _, r := range reports {
host := nodeHost(r)
ns, ok := nsMap[nsKey{host, nsName}]
if !ok {
row += fmt.Sprintf(" %-17s", styleMuted.Render("-"))
continue
}
row += fmt.Sprintf(" %-17s", renderNsCell(ns))
}
b.WriteString(row)
b.WriteString("\n")
}
// Detailed per-namespace view
b.WriteString("\n")
b.WriteString(styleBold.Render("Namespace Details"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n")
for _, nsName := range nsNames {
b.WriteString(fmt.Sprintf("\n %s\n", styleBold.Render(nsName)))
for _, r := range reports {
host := nodeHost(r)
for _, ns := range r.Namespaces {
if ns.Name != nsName {
continue
}
b.WriteString(fmt.Sprintf(" %-18s gw=%s rqlite=%s",
host,
statusStr(ns.GatewayUp),
statusStr(ns.RQLiteUp),
))
if ns.RQLiteState != "" {
b.WriteString(fmt.Sprintf("(%s)", ns.RQLiteState))
}
b.WriteString(fmt.Sprintf(" olric=%s", statusStr(ns.OlricUp)))
if ns.PortBase > 0 {
b.WriteString(fmt.Sprintf(" port=%d", ns.PortBase))
}
b.WriteString("\n")
}
}
}
return b.String()
}
// nsStatus holds a namespace's health indicators for one node.
type nsStatus struct {
gateway bool
rqlite bool
rqliteState string
rqliteReady bool
olric bool
}
// renderNsCell renders a compact cell for the namespace matrix.
func renderNsCell(ns nsStatus) string {
if ns.gateway && ns.rqlite && ns.olric {
return styleHealthy.Render("OK")
}
if !ns.gateway && !ns.rqlite {
return styleCritical.Render("DOWN")
}
// Partial
parts := []string{}
if !ns.gateway {
parts = append(parts, "gw")
}
if !ns.rqlite {
parts = append(parts, "rq")
}
if !ns.olric {
parts = append(parts, "ol")
}
return styleWarning.Render("!" + strings.Join(parts, ","))
}

View File

@ -0,0 +1,147 @@
package tui
import (
"fmt"
"strings"
"time"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// renderNodes renders the Nodes tab with detailed per-node information.
func renderNodes(snap *monitor.ClusterSnapshot, width int) string {
if snap == nil {
return styleMuted.Render("Collecting cluster data...")
}
var b strings.Builder
for i, cs := range snap.Nodes {
if i > 0 {
b.WriteString("\n")
}
host := cs.Node.Host
role := cs.Node.Role
if role == "" {
role = "node"
}
if cs.Error != nil {
b.WriteString(styleBold.Render(fmt.Sprintf("Node: %s", host)))
b.WriteString(fmt.Sprintf(" (%s)", role))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n")
b.WriteString(fmt.Sprintf(" Status: %s\n", styleCritical.Render("UNREACHABLE")))
b.WriteString(fmt.Sprintf(" Error: %s\n", styleCritical.Render(cs.Error.Error())))
b.WriteString(fmt.Sprintf(" Took: %s\n", styleMuted.Render(cs.Duration.Truncate(time.Millisecond).String())))
if cs.Retries > 0 {
b.WriteString(fmt.Sprintf(" Retries: %d\n", cs.Retries))
}
continue
}
r := cs.Report
if r == nil {
continue
}
b.WriteString(styleBold.Render(fmt.Sprintf("Node: %s", host)))
b.WriteString(fmt.Sprintf(" (%s) ", role))
b.WriteString(styleHealthy.Render("ONLINE"))
if r.Version != "" {
b.WriteString(fmt.Sprintf(" v%s", r.Version))
}
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n")
// System Resources
if r.System != nil {
sys := r.System
b.WriteString(styleBold.Render(" System"))
b.WriteString("\n")
b.WriteString(fmt.Sprintf(" CPU: %d cores, load %.1f / %.1f / %.1f\n",
sys.CPUCount, sys.LoadAvg1, sys.LoadAvg5, sys.LoadAvg15))
b.WriteString(fmt.Sprintf(" Memory: %s (%d / %d MB, %d MB avail)\n",
colorPct(sys.MemUsePct), sys.MemUsedMB, sys.MemTotalMB, sys.MemAvailMB))
b.WriteString(fmt.Sprintf(" Disk: %s (%s / %s, %s avail)\n",
colorPct(sys.DiskUsePct), sys.DiskUsedGB, sys.DiskTotalGB, sys.DiskAvailGB))
if sys.SwapTotalMB > 0 {
b.WriteString(fmt.Sprintf(" Swap: %d / %d MB\n", sys.SwapUsedMB, sys.SwapTotalMB))
}
b.WriteString(fmt.Sprintf(" Uptime: %s\n", sys.UptimeSince))
if sys.OOMKills > 0 {
b.WriteString(fmt.Sprintf(" OOM: %s\n", styleCritical.Render(fmt.Sprintf("%d kills", sys.OOMKills))))
}
}
// Services
if r.Services != nil && len(r.Services.Services) > 0 {
b.WriteString(styleBold.Render(" Services"))
b.WriteString("\n")
for _, svc := range r.Services.Services {
stateStr := styleHealthy.Render(svc.ActiveState)
if svc.ActiveState == "failed" {
stateStr = styleCritical.Render("FAILED")
} else if svc.ActiveState != "active" {
stateStr = styleWarning.Render(svc.ActiveState)
}
extra := ""
if svc.MemoryCurrentMB > 0 {
extra += fmt.Sprintf(" mem=%dMB", svc.MemoryCurrentMB)
}
if svc.NRestarts > 0 {
extra += fmt.Sprintf(" restarts=%d", svc.NRestarts)
}
if svc.RestartLoopRisk {
extra += styleCritical.Render(" RESTART-LOOP")
}
b.WriteString(fmt.Sprintf(" %-28s %s%s\n", svc.Name, stateStr, extra))
}
if len(r.Services.FailedUnits) > 0 {
b.WriteString(fmt.Sprintf(" Failed units: %s\n",
styleCritical.Render(strings.Join(r.Services.FailedUnits, ", "))))
}
}
// RQLite
if r.RQLite != nil {
rq := r.RQLite
b.WriteString(styleBold.Render(" RQLite"))
b.WriteString("\n")
b.WriteString(fmt.Sprintf(" Responsive: %s Ready: %s Strong Read: %s\n",
statusStr(rq.Responsive), statusStr(rq.Ready), statusStr(rq.StrongRead)))
if rq.Responsive {
b.WriteString(fmt.Sprintf(" Raft: %s Leader: %s Term: %d Applied: %d\n",
styleBold.Render(rq.RaftState), rq.LeaderAddr, rq.Term, rq.Applied))
if rq.DBSize != "" {
b.WriteString(fmt.Sprintf(" DB size: %s Peers: %d Goroutines: %d Heap: %dMB\n",
rq.DBSize, rq.NumPeers, rq.Goroutines, rq.HeapMB))
}
}
}
// WireGuard
if r.WireGuard != nil {
wg := r.WireGuard
b.WriteString(styleBold.Render(" WireGuard"))
b.WriteString("\n")
b.WriteString(fmt.Sprintf(" Interface: %s IP: %s Peers: %d\n",
statusStr(wg.InterfaceUp), wg.WgIP, wg.PeerCount))
}
// Network
if r.Network != nil {
net := r.Network
b.WriteString(styleBold.Render(" Network"))
b.WriteString("\n")
b.WriteString(fmt.Sprintf(" Internet: %s UFW: %s TCP est: %d retrans: %.1f%%\n",
statusStr(net.InternetReachable), statusStr(net.UFWActive),
net.TCPEstablished, net.TCPRetransRate))
}
}
return b.String()
}

View File

@ -0,0 +1,183 @@
package tui
import (
"fmt"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// renderOverview renders the Overview tab: cluster summary, node table, alert summary.
func renderOverview(snap *monitor.ClusterSnapshot, width int) string {
if snap == nil {
return styleMuted.Render("Collecting cluster data...")
}
var b strings.Builder
// -- Cluster Summary --
b.WriteString(styleBold.Render("Cluster Summary"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n")
healthy := snap.HealthyCount()
total := snap.TotalCount()
failed := total - healthy
healthColor := styleHealthy
if failed > 0 {
healthColor = styleWarning
}
if healthy == 0 && total > 0 {
healthColor = styleCritical
}
b.WriteString(fmt.Sprintf(" Environment: %s\n", styleBold.Render(snap.Environment)))
b.WriteString(fmt.Sprintf(" Nodes: %s / %d\n", healthColor.Render(fmt.Sprintf("%d healthy", healthy)), total))
if failed > 0 {
b.WriteString(fmt.Sprintf(" Failed: %s\n", styleCritical.Render(fmt.Sprintf("%d", failed))))
}
b.WriteString(fmt.Sprintf(" Collect time: %s\n", styleMuted.Render(snap.Duration.Truncate(1e6).String())))
b.WriteString("\n")
// -- Node Table --
b.WriteString(styleBold.Render("Nodes"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n")
// Header row
b.WriteString(fmt.Sprintf(" %-18s %-8s %-10s %-8s %-8s %-8s %-10s\n",
headerStyle.Render("HOST"),
headerStyle.Render("STATUS"),
headerStyle.Render("ROLE"),
headerStyle.Render("CPU"),
headerStyle.Render("MEM%"),
headerStyle.Render("DISK%"),
headerStyle.Render("RQLITE"),
))
for _, cs := range snap.Nodes {
if cs.Error != nil {
b.WriteString(fmt.Sprintf(" %-18s %s %s\n",
cs.Node.Host,
styleCritical.Render("FAIL"),
styleMuted.Render(truncateStr(cs.Error.Error(), 40)),
))
continue
}
r := cs.Report
if r == nil {
continue
}
host := r.PublicIP
if host == "" {
host = r.Hostname
}
var status string
if cs.Error == nil && r != nil {
status = styleHealthy.Render("OK")
} else {
status = styleCritical.Render("FAIL")
}
role := cs.Node.Role
if role == "" {
role = "node"
}
cpuStr := "-"
memStr := "-"
diskStr := "-"
if r.System != nil {
cpuStr = fmt.Sprintf("%.1f", r.System.LoadAvg1)
memStr = colorPct(r.System.MemUsePct)
diskStr = colorPct(r.System.DiskUsePct)
}
rqliteStr := "-"
if r.RQLite != nil {
if r.RQLite.Responsive {
rqliteStr = styleHealthy.Render(r.RQLite.RaftState)
} else {
rqliteStr = styleCritical.Render("DOWN")
}
}
b.WriteString(fmt.Sprintf(" %-18s %-8s %-10s %-8s %-8s %-8s %-10s\n",
host, status, role, cpuStr, memStr, diskStr, rqliteStr))
}
b.WriteString("\n")
// -- Alert Summary --
critCount, warnCount, infoCount := countAlertsBySeverity(snap.Alerts)
b.WriteString(styleBold.Render("Alerts"))
b.WriteString(fmt.Sprintf(" %s %s %s\n",
styleCritical.Render(fmt.Sprintf("%d critical", critCount)),
styleWarning.Render(fmt.Sprintf("%d warning", warnCount)),
styleMuted.Render(fmt.Sprintf("%d info", infoCount)),
))
if critCount > 0 {
b.WriteString("\n")
for _, a := range snap.Alerts {
if a.Severity == monitor.AlertCritical {
b.WriteString(fmt.Sprintf(" %s [%s] %s: %s\n",
styleCritical.Render("CRIT"),
a.Subsystem,
a.Node,
a.Message,
))
}
}
}
return b.String()
}
// colorPct returns a percentage string colored by threshold.
func colorPct(pct int) string {
s := fmt.Sprintf("%d%%", pct)
switch {
case pct >= 90:
return styleCritical.Render(s)
case pct >= 75:
return styleWarning.Render(s)
default:
return styleHealthy.Render(s)
}
}
// countAlertsBySeverity counts alerts by severity level.
func countAlertsBySeverity(alerts []monitor.Alert) (crit, warn, info int) {
for _, a := range alerts {
switch a.Severity {
case monitor.AlertCritical:
crit++
case monitor.AlertWarning:
warn++
case monitor.AlertInfo:
info++
}
}
return
}
// truncateStr truncates a string to maxLen characters.
func truncateStr(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}
// separator returns a dashed line of the given width.
func separator(width int) string {
if width <= 0 {
width = 80
}
return styleMuted.Render(strings.Repeat("\u2500", width))
}

View File

@ -0,0 +1,133 @@
package tui
import (
"fmt"
"sort"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
)
// renderServicesTab renders a cross-node service matrix.
func renderServicesTab(snap *monitor.ClusterSnapshot, width int) string {
if snap == nil {
return styleMuted.Render("Collecting cluster data...")
}
reports := snap.Healthy()
if len(reports) == 0 {
return styleMuted.Render("No healthy nodes to display.")
}
var b strings.Builder
// Collect all unique service names across nodes
svcSet := make(map[string]bool)
for _, r := range reports {
if r.Services == nil {
continue
}
for _, svc := range r.Services.Services {
svcSet[svc.Name] = true
}
}
svcNames := make([]string, 0, len(svcSet))
for name := range svcSet {
svcNames = append(svcNames, name)
}
sort.Strings(svcNames)
if len(svcNames) == 0 {
return styleMuted.Render("No services found on any node.")
}
b.WriteString(styleBold.Render("Service Matrix"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n\n")
// Header: service name + each node host
header := fmt.Sprintf(" %-28s", headerStyle.Render("SERVICE"))
for _, r := range reports {
host := nodeHost(r)
if len(host) > 15 {
host = host[:15]
}
header += fmt.Sprintf(" %-17s", headerStyle.Render(host))
}
b.WriteString(header)
b.WriteString("\n")
// Build a lookup: host -> service name -> ServiceInfo
type svcKey struct {
host string
name string
}
svcMap := make(map[svcKey]string) // status string
for _, r := range reports {
host := nodeHost(r)
if r.Services == nil {
continue
}
for _, svc := range r.Services.Services {
var st string
switch {
case svc.ActiveState == "active":
st = styleHealthy.Render("active")
case svc.ActiveState == "failed":
st = styleCritical.Render("FAILED")
case svc.ActiveState == "":
st = styleMuted.Render("n/a")
default:
st = styleWarning.Render(svc.ActiveState)
}
if svc.RestartLoopRisk {
st = styleCritical.Render("LOOP!")
}
svcMap[svcKey{host, svc.Name}] = st
}
}
// Rows
for _, svcName := range svcNames {
row := fmt.Sprintf(" %-28s", svcName)
for _, r := range reports {
host := nodeHost(r)
st, ok := svcMap[svcKey{host, svcName}]
if !ok {
st = styleMuted.Render("-")
}
row += fmt.Sprintf(" %-17s", st)
}
b.WriteString(row)
b.WriteString("\n")
}
// Failed units per node
hasFailedUnits := false
for _, r := range reports {
if r.Services != nil && len(r.Services.FailedUnits) > 0 {
hasFailedUnits = true
break
}
}
if hasFailedUnits {
b.WriteString("\n")
b.WriteString(styleBold.Render("Failed Systemd Units"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n")
for _, r := range reports {
if r.Services == nil || len(r.Services.FailedUnits) == 0 {
continue
}
b.WriteString(fmt.Sprintf(" %s: %s\n",
styleBold.Render(nodeHost(r)),
styleCritical.Render(strings.Join(r.Services.FailedUnits, ", ")),
))
}
}
return b.String()
}

View File

@ -0,0 +1,58 @@
package tui
import (
"github.com/charmbracelet/lipgloss"
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
)
var (
colorGreen = lipgloss.Color("#00ff00")
colorRed = lipgloss.Color("#ff0000")
colorYellow = lipgloss.Color("#ffff00")
colorMuted = lipgloss.Color("#888888")
colorWhite = lipgloss.Color("#ffffff")
colorBg = lipgloss.Color("#1a1a2e")
styleHealthy = lipgloss.NewStyle().Foreground(colorGreen)
styleWarning = lipgloss.NewStyle().Foreground(colorYellow)
styleCritical = lipgloss.NewStyle().Foreground(colorRed)
styleMuted = lipgloss.NewStyle().Foreground(colorMuted)
styleBold = lipgloss.NewStyle().Bold(true)
activeTab = lipgloss.NewStyle().Bold(true).Foreground(colorWhite).Background(lipgloss.Color("#333333")).Padding(0, 1)
inactiveTab = lipgloss.NewStyle().Foreground(colorMuted).Padding(0, 1)
headerStyle = lipgloss.NewStyle().Bold(true).Foreground(colorWhite)
footerStyle = lipgloss.NewStyle().Foreground(colorMuted)
)
// statusStr returns a green "OK" when ok is true, red "DOWN" when false.
func statusStr(ok bool) string {
if ok {
return styleHealthy.Render("OK")
}
return styleCritical.Render("DOWN")
}
// severityStyle returns the appropriate lipgloss style for an alert severity.
func severityStyle(s string) lipgloss.Style {
switch s {
case "critical":
return styleCritical
case "warning":
return styleWarning
case "info":
return styleMuted
default:
return styleMuted
}
}
// nodeHost returns the best display host for a NodeReport.
func nodeHost(r *report.NodeReport) string {
if r.PublicIP != "" {
return r.PublicIP
}
return r.Hostname
}

View File

@ -0,0 +1,47 @@
package tui
import "strings"
// renderTabBar renders the tab bar with the active tab highlighted.
func renderTabBar(active int, width int) string {
var parts []string
for i, name := range tabNames {
if i == active {
parts = append(parts, activeTab.Render(name))
} else {
parts = append(parts, inactiveTab.Render(name))
}
}
bar := strings.Join(parts, styleMuted.Render(" | "))
// Pad to full width if needed
if width > 0 {
rendered := stripAnsi(bar)
if len(rendered) < width {
bar += strings.Repeat(" ", width-len(rendered))
}
}
return bar
}
// stripAnsi removes ANSI escape codes for length calculation.
func stripAnsi(s string) string {
var out []byte
inEsc := false
for i := 0; i < len(s); i++ {
if s[i] == '\x1b' {
inEsc = true
continue
}
if inEsc {
if (s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z') {
inEsc = false
}
continue
}
out = append(out, s[i])
}
return string(out)
}

View File

@ -0,0 +1,129 @@
package tui
import (
"fmt"
"strings"
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
)
// renderWGMesh renders the WireGuard mesh status tab with peer details.
func renderWGMesh(snap *monitor.ClusterSnapshot, width int) string {
if snap == nil {
return styleMuted.Render("Collecting cluster data...")
}
reports := snap.Healthy()
if len(reports) == 0 {
return styleMuted.Render("No healthy nodes to display.")
}
var b strings.Builder
// Mesh overview
b.WriteString(styleBold.Render("WireGuard Mesh Overview"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n\n")
// Summary header
b.WriteString(fmt.Sprintf(" %-18s %-10s %-18s %-6s %-8s\n",
headerStyle.Render("HOST"),
headerStyle.Render("IFACE"),
headerStyle.Render("WG IP"),
headerStyle.Render("PEERS"),
headerStyle.Render("PORT"),
))
wgNodes := 0
for _, r := range reports {
if r.WireGuard == nil {
continue
}
wgNodes++
wg := r.WireGuard
ifaceStr := statusStr(wg.InterfaceUp)
b.WriteString(fmt.Sprintf(" %-18s %-10s %-18s %-6d %-8d\n",
nodeHost(r), ifaceStr, wg.WgIP, wg.PeerCount, wg.ListenPort))
}
if wgNodes == 0 {
return styleMuted.Render("No nodes have WireGuard configured.")
}
expectedPeers := wgNodes - 1
// Per-node peer details
b.WriteString("\n")
b.WriteString(styleBold.Render("Peer Details"))
b.WriteString("\n")
b.WriteString(separator(width))
b.WriteString("\n")
for _, r := range reports {
if r.WireGuard == nil || len(r.WireGuard.Peers) == 0 {
continue
}
b.WriteString("\n")
host := nodeHost(r)
peerCountStr := fmt.Sprintf("%d/%d peers", len(r.WireGuard.Peers), expectedPeers)
if len(r.WireGuard.Peers) < expectedPeers {
peerCountStr = styleCritical.Render(peerCountStr)
} else {
peerCountStr = styleHealthy.Render(peerCountStr)
}
b.WriteString(fmt.Sprintf(" %s %s\n", styleBold.Render(host), peerCountStr))
for _, p := range r.WireGuard.Peers {
b.WriteString(renderPeerLine(p))
}
}
return b.String()
}
// renderPeerLine formats a single WG peer.
func renderPeerLine(p report.WGPeerInfo) string {
keyShort := p.PublicKey
if len(keyShort) > 12 {
keyShort = keyShort[:12] + "..."
}
// Handshake status
var hsStr string
if p.LatestHandshake == 0 {
hsStr = styleCritical.Render("never")
} else if p.HandshakeAgeSec > 180 {
hsStr = styleWarning.Render(fmt.Sprintf("%ds ago", p.HandshakeAgeSec))
} else {
hsStr = styleHealthy.Render(fmt.Sprintf("%ds ago", p.HandshakeAgeSec))
}
// Transfer
rx := formatBytes(p.TransferRx)
tx := formatBytes(p.TransferTx)
return fmt.Sprintf(" key=%s endpoint=%-22s hs=%s rx=%s tx=%s ips=%s\n",
styleMuted.Render(keyShort),
p.Endpoint,
hsStr,
rx, tx,
p.AllowedIPs,
)
}
// formatBytes formats bytes into a human-readable string.
func formatBytes(b int64) string {
switch {
case b >= 1<<30:
return fmt.Sprintf("%.1fGB", float64(b)/(1<<30))
case b >= 1<<20:
return fmt.Sprintf("%.1fMB", float64(b)/(1<<20))
case b >= 1<<10:
return fmt.Sprintf("%.1fKB", float64(b)/(1<<10))
default:
return fmt.Sprintf("%dB", b)
}
}

View File

@ -0,0 +1,97 @@
package report
import (
"context"
"os"
"regexp"
"strconv"
"strings"
"time"
)
// collectAnyone gathers Anyone Protocol relay/client health information.
func collectAnyone() *AnyoneReport {
r := &AnyoneReport{}
// 1. RelayActive: systemctl is-active orama-anyone-relay
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-anyone-relay"); err == nil {
r.RelayActive = strings.TrimSpace(out) == "active"
}
}
// 2. ClientActive: systemctl is-active orama-anyone-client
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-anyone-client"); err == nil {
r.ClientActive = strings.TrimSpace(out) == "active"
}
}
// 3. Mode: derive from active state
if r.RelayActive {
r.Mode = "relay"
} else if r.ClientActive {
r.Mode = "client"
}
// 4. ORPortListening, SocksListening, ControlListening: check ports in ss -tlnp
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
r.ORPortListening = portIsListening(out, 9001)
r.SocksListening = portIsListening(out, 9050)
r.ControlListening = portIsListening(out, 9051)
}
}
// 5. Bootstrapped / BootstrapPct: parse last "Bootstrapped" line from notices.log
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "bash", "-c",
`grep "Bootstrapped" /var/log/anon/notices.log 2>/dev/null | tail -1`); err == nil {
out = strings.TrimSpace(out)
if out != "" {
// Parse percentage from lines like:
// "... Bootstrapped 100% (done): Done"
// "... Bootstrapped 85%: Loading relay descriptors"
re := regexp.MustCompile(`Bootstrapped\s+(\d+)%`)
if m := re.FindStringSubmatch(out); len(m) >= 2 {
if pct, err := strconv.Atoi(m[1]); err == nil {
r.BootstrapPct = pct
r.Bootstrapped = pct == 100
}
}
}
}
}
// 6. Fingerprint: read /var/lib/anon/fingerprint
if data, err := os.ReadFile("/var/lib/anon/fingerprint"); err == nil {
line := strings.TrimSpace(string(data))
// The file may contain "nickname fingerprint" — extract just the fingerprint.
fields := strings.Fields(line)
if len(fields) >= 2 {
r.Fingerprint = fields[1]
} else if len(fields) == 1 {
r.Fingerprint = fields[0]
}
}
// 7. Nickname: extract from anonrc config
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "bash", "-c",
`grep "^Nickname" /etc/anon/anonrc 2>/dev/null | awk '{print $2}'`); err == nil {
r.Nickname = strings.TrimSpace(out)
}
}
return r
}

View File

@ -0,0 +1,254 @@
package report
import (
"context"
"math"
"os"
"regexp"
"strconv"
"strings"
"time"
)
// collectDNS gathers CoreDNS, Caddy, and DNS resolution health information.
// Only called when /etc/coredns exists.
func collectDNS() *DNSReport {
r := &DNSReport{}
// Set TLS days to -1 by default (failure state).
r.BaseTLSDaysLeft = -1
r.WildTLSDaysLeft = -1
// 1. CoreDNSActive: systemctl is-active coredns
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "coredns"); err == nil {
r.CoreDNSActive = strings.TrimSpace(out) == "active"
}
}
// 2. CaddyActive: systemctl is-active caddy
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "caddy"); err == nil {
r.CaddyActive = strings.TrimSpace(out) == "active"
}
}
// 3. Port53Bound: check :53 in ss -ulnp
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ss", "-ulnp"); err == nil {
r.Port53Bound = strings.Contains(out, ":53 ") || strings.Contains(out, ":53\t")
}
}
// 4. Port80Bound and Port443Bound: check in ss -tlnp
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
r.Port80Bound = strings.Contains(out, ":80 ") || strings.Contains(out, ":80\t")
r.Port443Bound = strings.Contains(out, ":443 ") || strings.Contains(out, ":443\t")
}
}
// 5. CoreDNSMemMB: ps -C coredns -o rss=
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ps", "-C", "coredns", "-o", "rss=", "--no-headers"); err == nil {
line := strings.TrimSpace(out)
if line != "" {
first := strings.Fields(line)[0]
if kb, err := strconv.Atoi(first); err == nil {
r.CoreDNSMemMB = kb / 1024
}
}
}
}
// 6. CoreDNSRestarts: systemctl show coredns --property=NRestarts
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "show", "coredns", "--property=NRestarts"); err == nil {
props := parseProperties(out)
r.CoreDNSRestarts = parseInt(props["NRestarts"])
}
}
// 7. LogErrors: grep errors from coredns journal (last 5 min)
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "bash", "-c",
`journalctl -u coredns --no-pager -n 100 --since "5 min ago" 2>/dev/null | grep -ciE "(error|ERR)" || echo 0`); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.LogErrors = n
}
}
}
// 8. CorefileExists: check /etc/coredns/Corefile
if _, err := os.Stat("/etc/coredns/Corefile"); err == nil {
r.CorefileExists = true
}
// Parse domain from Corefile for DNS resolution tests.
domain := parseDomain()
if domain == "" {
return r
}
// 9. SOAResolves: dig SOA
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "SOA", domain, "+short", "+time=2"); err == nil {
r.SOAResolves = strings.TrimSpace(out) != ""
}
}
// 10. NSResolves and NSRecordCount: dig NS
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "NS", domain, "+short", "+time=2"); err == nil {
out = strings.TrimSpace(out)
if out != "" {
r.NSResolves = true
lines := strings.Split(out, "\n")
count := 0
for _, l := range lines {
if strings.TrimSpace(l) != "" {
count++
}
}
r.NSRecordCount = count
}
}
}
// 11. WildcardResolves: dig A test.<domain>
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "A", "test."+domain, "+short", "+time=2"); err == nil {
r.WildcardResolves = strings.TrimSpace(out) != ""
}
}
// 12. BaseAResolves: dig A <domain>
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "A", domain, "+short", "+time=2"); err == nil {
r.BaseAResolves = strings.TrimSpace(out) != ""
}
}
// 13. BaseTLSDaysLeft: check TLS cert expiry for base domain
r.BaseTLSDaysLeft = checkTLSDaysLeft(domain, domain)
// 14. WildTLSDaysLeft: check TLS cert expiry for wildcard
r.WildTLSDaysLeft = checkTLSDaysLeft("*."+domain, domain)
return r
}
// parseDomain reads /etc/coredns/Corefile and extracts the base domain.
// It looks for zone block declarations like "example.com {" or "*.example.com {"
// and returns the base domain (without wildcard prefix).
func parseDomain() string {
data, err := os.ReadFile("/etc/coredns/Corefile")
if err != nil {
return ""
}
content := string(data)
// Look for domain patterns in the Corefile.
// Common patterns:
// example.com {
// *.example.com {
// example.com:53 {
// We want to find a real domain, not "." (root zone).
domainRe := regexp.MustCompile(`(?m)^\s*\*?\.?([a-zA-Z0-9][-a-zA-Z0-9]*\.[a-zA-Z0-9][-a-zA-Z0-9.]*[a-zA-Z])(?::\d+)?\s*\{`)
matches := domainRe.FindStringSubmatch(content)
if len(matches) >= 2 {
return matches[1]
}
// Fallback: look for any line that looks like a domain block declaration.
for _, line := range strings.Split(content, "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
// Strip trailing "{" and port suffix.
line = strings.TrimSuffix(line, "{")
line = strings.TrimSpace(line)
// Remove port if present.
if idx := strings.LastIndex(line, ":"); idx > 0 {
if _, err := strconv.Atoi(line[idx+1:]); err == nil {
line = line[:idx]
}
}
// Strip wildcard prefix.
line = strings.TrimPrefix(line, "*.")
// Check if it looks like a domain (has at least one dot and no spaces).
if strings.Contains(line, ".") && !strings.Contains(line, " ") && line != "." {
return strings.TrimSpace(line)
}
}
return ""
}
// checkTLSDaysLeft uses openssl to check the TLS certificate expiry date
// for a given servername connecting to localhost:443.
// Returns days until expiry, or -1 on any failure.
func checkTLSDaysLeft(servername, domain string) int {
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
cmd := `echo | openssl s_client -servername ` + servername + ` -connect localhost:443 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null`
out, err := runCmd(ctx, "bash", "-c", cmd)
if err != nil {
return -1
}
// Output looks like: "notAfter=Mar 15 12:00:00 2025 GMT"
out = strings.TrimSpace(out)
if !strings.HasPrefix(out, "notAfter=") {
return -1
}
dateStr := strings.TrimPrefix(out, "notAfter=")
dateStr = strings.TrimSpace(dateStr)
// Parse the date. OpenSSL uses the format: "Jan 2 15:04:05 2006 GMT"
layouts := []string{
"Jan 2 15:04:05 2006 GMT",
"Jan 2 15:04:05 2006 GMT",
"Jan 02 15:04:05 2006 GMT",
}
for _, layout := range layouts {
t, err := time.Parse(layout, dateStr)
if err == nil {
days := int(math.Floor(time.Until(t).Hours() / 24))
return days
}
}
return -1
}

View File

@ -0,0 +1,63 @@
package report
import (
"context"
"encoding/json"
"io"
"net/http"
"time"
)
// collectGateway checks the main gateway health endpoint and parses subsystem status.
func collectGateway() *GatewayReport {
r := &GatewayReport{}
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost:6001/v1/health", nil)
if err != nil {
return r
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
r.Responsive = false
return r
}
defer resp.Body.Close()
r.Responsive = true
r.HTTPStatus = resp.StatusCode
body, err := io.ReadAll(resp.Body)
if err != nil {
return r
}
// Try to parse the health response JSON.
// Expected: {"status":"ok","version":"...","subsystems":{"rqlite":{"status":"ok","latency":"2ms"},...}}
var health struct {
Status string `json:"status"`
Version string `json:"version"`
Subsystems map[string]json.RawMessage `json:"subsystems"`
}
if err := json.Unmarshal(body, &health); err != nil {
return r
}
r.Version = health.Version
if len(health.Subsystems) > 0 {
r.Subsystems = make(map[string]SubsystemHealth, len(health.Subsystems))
for name, raw := range health.Subsystems {
var sub SubsystemHealth
if err := json.Unmarshal(raw, &sub); err == nil {
r.Subsystems[name] = sub
}
}
}
return r
}

View File

@ -0,0 +1,148 @@
package report
import (
"bytes"
"context"
"encoding/json"
"io"
"net/http"
"os"
"strings"
"time"
)
// collectIPFS gathers IPFS daemon and cluster health information.
func collectIPFS() *IPFSReport {
r := &IPFSReport{}
// 1. DaemonActive: systemctl is-active orama-ipfs
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-ipfs"); err == nil {
r.DaemonActive = strings.TrimSpace(out) == "active"
}
}
// 2. ClusterActive: systemctl is-active orama-ipfs-cluster
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-ipfs-cluster"); err == nil {
r.ClusterActive = strings.TrimSpace(out) == "active"
}
}
// 3. SwarmPeerCount: POST /api/v0/swarm/peers
{
body, err := ipfsPost("http://localhost:4501/api/v0/swarm/peers")
if err == nil {
var resp struct {
Peers []interface{} `json:"Peers"`
}
if err := json.Unmarshal(body, &resp); err == nil {
r.SwarmPeerCount = len(resp.Peers)
}
}
}
// 4. ClusterPeerCount: GET /peers
{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
if body, err := httpGet(ctx, "http://localhost:9094/peers"); err == nil {
var peers []interface{}
if err := json.Unmarshal(body, &peers); err == nil {
r.ClusterPeerCount = len(peers)
}
}
}
// 5. RepoSizeBytes/RepoMaxBytes: POST /api/v0/repo/stat
{
body, err := ipfsPost("http://localhost:4501/api/v0/repo/stat")
if err == nil {
var resp struct {
RepoSize int64 `json:"RepoSize"`
StorageMax int64 `json:"StorageMax"`
}
if err := json.Unmarshal(body, &resp); err == nil {
r.RepoSizeBytes = resp.RepoSize
r.RepoMaxBytes = resp.StorageMax
if resp.StorageMax > 0 && resp.RepoSize > 0 {
r.RepoUsePct = int(float64(resp.RepoSize) / float64(resp.StorageMax) * 100)
}
}
}
}
// 6. KuboVersion: POST /api/v0/version
{
body, err := ipfsPost("http://localhost:4501/api/v0/version")
if err == nil {
var resp struct {
Version string `json:"Version"`
}
if err := json.Unmarshal(body, &resp); err == nil {
r.KuboVersion = resp.Version
}
}
}
// 7. ClusterVersion: GET /id
{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
if body, err := httpGet(ctx, "http://localhost:9094/id"); err == nil {
var resp struct {
Version string `json:"version"`
}
if err := json.Unmarshal(body, &resp); err == nil {
r.ClusterVersion = resp.Version
}
}
}
// 8. HasSwarmKey: check file existence
if _, err := os.Stat("/opt/orama/.orama/data/ipfs/repo/swarm.key"); err == nil {
r.HasSwarmKey = true
}
// 9. BootstrapEmpty: POST /api/v0/bootstrap/list
{
body, err := ipfsPost("http://localhost:4501/api/v0/bootstrap/list")
if err == nil {
var resp struct {
Peers []interface{} `json:"Peers"`
}
if err := json.Unmarshal(body, &resp); err == nil {
r.BootstrapEmpty = resp.Peers == nil || len(resp.Peers) == 0
} else {
// If we got a response but Peers is missing, treat as empty.
r.BootstrapEmpty = true
}
}
}
return r
}
// ipfsPost sends a POST request with an empty body to an IPFS API endpoint.
// IPFS uses POST for all API calls. Uses a 3-second timeout.
func ipfsPost(url string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(nil))
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}

View File

@ -0,0 +1,205 @@
package report
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
)
// collectNamespaces discovers deployed namespaces and checks health of their
// per-namespace services (RQLite, Olric, Gateway).
func collectNamespaces() []NamespaceReport {
namespaces := discoverNamespaces()
if len(namespaces) == 0 {
return nil
}
var reports []NamespaceReport
for _, ns := range namespaces {
reports = append(reports, collectNamespaceReport(ns))
}
return reports
}
type nsInfo struct {
name string
portBase int
}
// discoverNamespaces finds deployed namespaces by looking for systemd service units
// and/or the filesystem namespace directory.
func discoverNamespaces() []nsInfo {
var result []nsInfo
seen := make(map[string]bool)
// Strategy 1: Glob for orama-deploy-*-rqlite.service files.
matches, _ := filepath.Glob("/etc/systemd/system/orama-deploy-*-rqlite.service")
for _, path := range matches {
base := filepath.Base(path)
// Extract namespace name: orama-deploy-<name>-rqlite.service
name := strings.TrimPrefix(base, "orama-deploy-")
name = strings.TrimSuffix(name, "-rqlite.service")
if name == "" || seen[name] {
continue
}
seen[name] = true
portBase := parsePortBaseFromUnit(path)
if portBase > 0 {
result = append(result, nsInfo{name: name, portBase: portBase})
}
}
// Strategy 2: Check filesystem for any namespaces not found via systemd.
nsDir := "/opt/orama/.orama/data/namespaces"
entries, err := os.ReadDir(nsDir)
if err == nil {
for _, entry := range entries {
if !entry.IsDir() || seen[entry.Name()] {
continue
}
name := entry.Name()
seen[name] = true
// Try to find the port base from a corresponding service unit.
unitPath := fmt.Sprintf("/etc/systemd/system/orama-deploy-%s-rqlite.service", name)
portBase := parsePortBaseFromUnit(unitPath)
if portBase > 0 {
result = append(result, nsInfo{name: name, portBase: portBase})
}
}
}
return result
}
// parsePortBaseFromUnit reads a systemd unit file and extracts the port base
// from ExecStart arguments or environment variables.
//
// It looks for patterns like:
// - "-http-addr localhost:PORT" or "-http-addr 0.0.0.0:PORT" in ExecStart
// - "PORT_BASE=NNNN" in environment files
// - Any port number that appears to be the RQLite HTTP port (the base port)
func parsePortBaseFromUnit(unitPath string) int {
data, err := os.ReadFile(unitPath)
if err != nil {
return 0
}
content := string(data)
// Look for -http-addr with a port number in ExecStart line.
httpAddrRe := regexp.MustCompile(`-http-addr\s+\S+:(\d+)`)
if m := httpAddrRe.FindStringSubmatch(content); len(m) >= 2 {
if port, err := strconv.Atoi(m[1]); err == nil {
return port
}
}
// Look for a port in -addr or -http flags.
addrRe := regexp.MustCompile(`(?:-addr|-http)\s+\S*:(\d+)`)
if m := addrRe.FindStringSubmatch(content); len(m) >= 2 {
if port, err := strconv.Atoi(m[1]); err == nil {
return port
}
}
// Look for PORT_BASE environment variable in EnvironmentFile or Environment= directives.
portBaseRe := regexp.MustCompile(`PORT_BASE=(\d+)`)
if m := portBaseRe.FindStringSubmatch(content); len(m) >= 2 {
if port, err := strconv.Atoi(m[1]); err == nil {
return port
}
}
// Check referenced EnvironmentFile for PORT_BASE.
envFileRe := regexp.MustCompile(`EnvironmentFile=(.+)`)
if m := envFileRe.FindStringSubmatch(content); len(m) >= 2 {
envPath := strings.TrimSpace(m[1])
envPath = strings.TrimPrefix(envPath, "-") // optional prefix means "ignore if missing"
if envData, err := os.ReadFile(envPath); err == nil {
if m2 := portBaseRe.FindStringSubmatch(string(envData)); len(m2) >= 2 {
if port, err := strconv.Atoi(m2[1]); err == nil {
return port
}
}
}
}
return 0
}
// collectNamespaceReport checks the health of services for a single namespace.
func collectNamespaceReport(ns nsInfo) NamespaceReport {
r := NamespaceReport{
Name: ns.name,
PortBase: ns.portBase,
}
// 1. RQLiteUp + RQLiteState: GET http://localhost:<port_base>/status
{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
url := fmt.Sprintf("http://localhost:%d/status", ns.portBase)
if body, err := httpGet(ctx, url); err == nil {
r.RQLiteUp = true
var status map[string]interface{}
if err := json.Unmarshal(body, &status); err == nil {
r.RQLiteState = getNestedString(status, "store", "raft", "state")
}
}
}
// 2. RQLiteReady: GET http://localhost:<port_base>/readyz
{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
url := fmt.Sprintf("http://localhost:%d/readyz", ns.portBase)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err == nil {
if resp, err := http.DefaultClient.Do(req); err == nil {
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
r.RQLiteReady = resp.StatusCode == http.StatusOK
}
}
}
// 3. OlricUp: check if port_base+2 is listening
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
r.OlricUp = portIsListening(out, ns.portBase+2)
}
}
// 4. GatewayUp + GatewayStatus: GET http://localhost:<port_base+4>/v1/health
{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
url := fmt.Sprintf("http://localhost:%d/v1/health", ns.portBase+4)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err == nil {
if resp, err := http.DefaultClient.Do(req); err == nil {
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
r.GatewayUp = true
r.GatewayStatus = resp.StatusCode
}
}
}
return r
}

View File

@ -0,0 +1,253 @@
package report
import (
"context"
"os"
"regexp"
"sort"
"strconv"
"strings"
"time"
)
// collectNetwork gathers network connectivity, TCP stats, listening ports,
// and firewall status.
func collectNetwork() *NetworkReport {
r := &NetworkReport{}
// 1. InternetReachable: ping 8.8.8.8
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if _, err := runCmd(ctx, "ping", "-c", "1", "-W", "2", "8.8.8.8"); err == nil {
r.InternetReachable = true
}
}
// 2. DefaultRoute: ip route show default
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ip", "route", "show", "default"); err == nil {
r.DefaultRoute = strings.TrimSpace(out) != ""
}
}
// 3. WGRouteExists: ip route show dev wg0
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ip", "route", "show", "dev", "wg0"); err == nil {
r.WGRouteExists = strings.TrimSpace(out) != ""
}
}
// 4. TCPEstablished / TCPTimeWait: parse `ss -s`
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ss", "-s"); err == nil {
for _, line := range strings.Split(out, "\n") {
lower := strings.ToLower(line)
if strings.HasPrefix(lower, "tcp:") || strings.Contains(lower, "estab") {
// Parse "estab N" and "timewait N" patterns from the line.
r.TCPEstablished = extractSSCount(line, "estab")
r.TCPTimeWait = extractSSCount(line, "timewait")
}
}
}
}
// 5. TCPRetransRate: read /proc/net/snmp
{
if data, err := os.ReadFile("/proc/net/snmp"); err == nil {
r.TCPRetransRate = parseTCPRetransRate(string(data))
}
}
// 6. ListeningPorts: ss -tlnp (TCP) + ss -ulnp (UDP)
{
seen := make(map[string]bool)
ctx1, cancel1 := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel1()
if out, err := runCmd(ctx1, "ss", "-tlnp"); err == nil {
for _, pi := range parseSSListening(out, "tcp") {
key := strconv.Itoa(pi.Port) + "/" + pi.Proto
if !seen[key] {
seen[key] = true
r.ListeningPorts = append(r.ListeningPorts, pi)
}
}
}
ctx2, cancel2 := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel2()
if out, err := runCmd(ctx2, "ss", "-ulnp"); err == nil {
for _, pi := range parseSSListening(out, "udp") {
key := strconv.Itoa(pi.Port) + "/" + pi.Proto
if !seen[key] {
seen[key] = true
r.ListeningPorts = append(r.ListeningPorts, pi)
}
}
}
// Sort by port number for consistent output.
sort.Slice(r.ListeningPorts, func(i, j int) bool {
if r.ListeningPorts[i].Port != r.ListeningPorts[j].Port {
return r.ListeningPorts[i].Port < r.ListeningPorts[j].Port
}
return r.ListeningPorts[i].Proto < r.ListeningPorts[j].Proto
})
}
// 7. UFWActive: ufw status
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ufw", "status"); err == nil {
r.UFWActive = strings.Contains(out, "Status: active")
}
}
// 8. UFWRules: ufw status numbered
if r.UFWActive {
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ufw", "status", "numbered"); err == nil {
r.UFWRules = parseUFWRules(out)
}
}
return r
}
// extractSSCount finds a pattern like "estab 42" or "timewait 7" in an ss -s line.
func extractSSCount(line, keyword string) int {
re := regexp.MustCompile(keyword + `\s+(\d+)`)
m := re.FindStringSubmatch(line)
if len(m) >= 2 {
if n, err := strconv.Atoi(m[1]); err == nil {
return n
}
}
return 0
}
// parseTCPRetransRate parses /proc/net/snmp content to compute
// RetransSegs / OutSegs * 100.
//
// The file has paired lines: a header line followed by a values line.
// We look for the "Tcp:" header and extract RetransSegs and OutSegs.
func parseTCPRetransRate(data string) float64 {
lines := strings.Split(data, "\n")
for i := 0; i+1 < len(lines); i++ {
if !strings.HasPrefix(lines[i], "Tcp:") {
continue
}
header := strings.Fields(lines[i])
values := strings.Fields(lines[i+1])
if !strings.HasPrefix(lines[i+1], "Tcp:") || len(header) != len(values) {
continue
}
var outSegs, retransSegs float64
for j, h := range header {
switch h {
case "OutSegs":
if v, err := strconv.ParseFloat(values[j], 64); err == nil {
outSegs = v
}
case "RetransSegs":
if v, err := strconv.ParseFloat(values[j], 64); err == nil {
retransSegs = v
}
}
}
if outSegs > 0 {
return retransSegs / outSegs * 100
}
return 0
}
return 0
}
// parseSSListening parses the output of `ss -tlnp` or `ss -ulnp` to extract
// port numbers and process names.
func parseSSListening(output, proto string) []PortInfo {
var ports []PortInfo
processRe := regexp.MustCompile(`users:\(\("([^"]+)"`)
for _, line := range strings.Split(output, "\n") {
line = strings.TrimSpace(line)
// Skip header and empty lines.
if line == "" || strings.HasPrefix(line, "State") || strings.HasPrefix(line, "Netid") {
continue
}
fields := strings.Fields(line)
if len(fields) < 4 {
continue
}
// The local address:port is typically the 4th field (index 3) for ss -tlnp
// or the 5th field (index 4) for some formats. We look for a field with ":PORT".
localAddr := ""
for _, f := range fields {
if strings.Contains(f, ":") && !strings.HasPrefix(f, "users:") {
// Could be *:port, 0.0.0.0:port, [::]:port, 127.0.0.1:port, etc.
if idx := strings.LastIndex(f, ":"); idx >= 0 {
portStr := f[idx+1:]
if _, err := strconv.Atoi(portStr); err == nil {
localAddr = f
break
}
}
}
}
if localAddr == "" {
continue
}
idx := strings.LastIndex(localAddr, ":")
if idx < 0 {
continue
}
portStr := localAddr[idx+1:]
port, err := strconv.Atoi(portStr)
if err != nil {
continue
}
process := ""
if m := processRe.FindStringSubmatch(line); len(m) >= 2 {
process = m[1]
}
ports = append(ports, PortInfo{
Port: port,
Proto: proto,
Process: process,
})
}
return ports
}
// parseUFWRules extracts rule lines from `ufw status numbered` output.
// Skips the header lines (Status, To, ---, blank lines).
func parseUFWRules(output string) []string {
var rules []string
for _, line := range strings.Split(output, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Rule lines start with "[ N]" pattern.
if strings.HasPrefix(line, "[") && strings.Contains(line, "]") {
rules = append(rules, line)
}
}
return rules
}

View File

@ -0,0 +1,150 @@
package report
import (
"context"
"encoding/json"
"strconv"
"strings"
"time"
)
// collectOlric gathers Olric distributed cache health information.
func collectOlric() *OlricReport {
r := &OlricReport{}
// 1. ServiceActive: systemctl is-active orama-olric
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-olric"); err == nil {
r.ServiceActive = strings.TrimSpace(out) == "active"
}
}
// 2. MemberlistUp: check if port 3322 is listening
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
r.MemberlistUp = portIsListening(out, 3322)
}
}
// 3. RestartCount: systemctl show NRestarts
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "show", "orama-olric", "--property=NRestarts"); err == nil {
props := parseProperties(out)
r.RestartCount = parseInt(props["NRestarts"])
}
}
// 4. ProcessMemMB: ps -C olric-server -o rss=
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ps", "-C", "olric-server", "-o", "rss=", "--no-headers"); err == nil {
line := strings.TrimSpace(out)
if line != "" {
// May have multiple lines if multiple processes; take the first.
first := strings.Fields(line)[0]
if kb, err := strconv.Atoi(first); err == nil {
r.ProcessMemMB = kb / 1024
}
}
}
}
// 5. LogErrors: grep errors from journal
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "bash", "-c",
`journalctl -u orama-olric --no-pager -n 200 --since "1 hour ago" 2>/dev/null | grep -ciE "(error|ERR)" || echo 0`); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.LogErrors = n
}
}
}
// 6. LogSuspects: grep suspect/marking failed/dead
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "bash", "-c",
`journalctl -u orama-olric --no-pager -n 200 --since "1 hour ago" 2>/dev/null | grep -ciE "(suspect|marking.*(failed|dead))" || echo 0`); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.LogSuspects = n
}
}
}
// 7. LogFlapping: grep memberlist join/leave
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "bash", "-c",
`journalctl -u orama-olric --no-pager -n 200 --since "1 hour ago" 2>/dev/null | grep -ciE "(memberlist.*(join|leave))" || echo 0`); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.LogFlapping = n
}
}
}
// 8. Member info: try HTTP GET to http://localhost:3320/
{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
if body, err := httpGet(ctx, "http://localhost:3320/"); err == nil {
var info struct {
Coordinator string `json:"coordinator"`
Members []struct {
Name string `json:"name"`
} `json:"members"`
// Some Olric versions expose a flat member list or a different structure.
}
if err := json.Unmarshal(body, &info); err == nil {
r.Coordinator = info.Coordinator
r.MemberCount = len(info.Members)
for _, m := range info.Members {
r.Members = append(r.Members, m.Name)
}
}
// Fallback: try to extract member count from a different JSON layout.
if r.MemberCount == 0 {
var raw map[string]interface{}
if err := json.Unmarshal(body, &raw); err == nil {
if members, ok := raw["members"]; ok {
if arr, ok := members.([]interface{}); ok {
r.MemberCount = len(arr)
for _, m := range arr {
if s, ok := m.(string); ok {
r.Members = append(r.Members, s)
}
}
}
}
if coord, ok := raw["coordinator"].(string); ok && r.Coordinator == "" {
r.Coordinator = coord
}
}
}
}
}
return r
}
// portIsListening checks if a given port number appears in ss -tlnp output.
func portIsListening(ssOutput string, port int) bool {
portStr := ":" + strconv.Itoa(port)
for _, line := range strings.Split(ssOutput, "\n") {
if strings.Contains(line, portStr) {
return true
}
}
return false
}

View File

@ -0,0 +1,89 @@
package report
import (
"context"
"strconv"
"strings"
"time"
)
// oramaProcessNames lists command substrings that identify orama-related processes.
var oramaProcessNames = []string{
"orama", "rqlite", "olric", "ipfs", "caddy", "coredns",
}
// collectProcesses gathers zombie/orphan process info and panic counts from logs.
func collectProcesses() *ProcessReport {
r := &ProcessReport{}
// Run ps once and reuse the output for both zombies and orphans.
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
out, err := runCmd(ctx, "ps", "-eo", "pid,ppid,state,comm", "--no-headers")
if err == nil {
for _, line := range strings.Split(out, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) < 4 {
continue
}
pid, _ := strconv.Atoi(fields[0])
ppid, _ := strconv.Atoi(fields[1])
state := fields[2]
command := strings.Join(fields[3:], " ")
proc := ProcessInfo{
PID: pid,
PPID: ppid,
State: state,
Command: command,
}
// Zombies: state == "Z"
if state == "Z" {
r.Zombies = append(r.Zombies, proc)
}
// Orphans: PPID == 1 and command contains an orama-related name.
if ppid == 1 && isOramaProcess(command) {
r.Orphans = append(r.Orphans, proc)
}
}
}
r.ZombieCount = len(r.Zombies)
r.OrphanCount = len(r.Orphans)
// PanicCount: check journal for panic/fatal in last hour.
{
ctx2, cancel2 := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel2()
out, err := runCmd(ctx2, "bash", "-c",
`journalctl -u orama-node --no-pager -n 500 --since "1 hour ago" 2>/dev/null | grep -ciE "(panic|fatal)" || echo 0`)
if err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.PanicCount = n
}
}
}
return r
}
// isOramaProcess checks if a command string contains any orama-related process name.
func isOramaProcess(command string) bool {
lower := strings.ToLower(command)
for _, name := range oramaProcessNames {
if strings.Contains(lower, name) {
return true
}
}
return false
}

View File

@ -0,0 +1,165 @@
package report
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"strings"
"sync"
"time"
)
// Handle is the main entry point for `orama node report`.
// It collects system, service, and component information in parallel,
// then outputs the full NodeReport as JSON to stdout.
func Handle(jsonFlag bool, version string) error {
start := time.Now()
rpt := &NodeReport{
Timestamp: start.UTC(),
Version: version,
}
if h, err := os.Hostname(); err == nil {
rpt.Hostname = h
}
var mu sync.Mutex
addError := func(msg string) {
mu.Lock()
rpt.Errors = append(rpt.Errors, msg)
mu.Unlock()
}
// safeGo launches a collector goroutine with panic recovery.
safeGo := func(wg *sync.WaitGroup, name string, fn func()) {
wg.Add(1)
go func() {
defer wg.Done()
defer func() {
if r := recover(); r != nil {
addError(fmt.Sprintf("%s collector panicked: %v", name, r))
}
}()
fn()
}()
}
var wg sync.WaitGroup
safeGo(&wg, "system", func() {
rpt.System = collectSystem()
})
safeGo(&wg, "services", func() {
rpt.Services = collectServices()
})
safeGo(&wg, "rqlite", func() {
rpt.RQLite = collectRQLite()
})
safeGo(&wg, "olric", func() {
rpt.Olric = collectOlric()
})
safeGo(&wg, "ipfs", func() {
rpt.IPFS = collectIPFS()
})
safeGo(&wg, "gateway", func() {
rpt.Gateway = collectGateway()
})
safeGo(&wg, "wireguard", func() {
rpt.WireGuard = collectWireGuard()
})
safeGo(&wg, "dns", func() {
// Only collect DNS info if this node runs CoreDNS.
if _, err := os.Stat("/etc/coredns"); err == nil {
rpt.DNS = collectDNS()
}
})
safeGo(&wg, "anyone", func() {
rpt.Anyone = collectAnyone()
})
safeGo(&wg, "network", func() {
rpt.Network = collectNetwork()
})
safeGo(&wg, "processes", func() {
rpt.Processes = collectProcesses()
})
safeGo(&wg, "namespaces", func() {
rpt.Namespaces = collectNamespaces()
})
wg.Wait()
// Populate top-level WireGuard IP from the WireGuard collector result.
if rpt.WireGuard != nil && rpt.WireGuard.WgIP != "" {
rpt.WGIP = rpt.WireGuard.WgIP
}
rpt.CollectMS = time.Since(start).Milliseconds()
enc := json.NewEncoder(os.Stdout)
if !jsonFlag {
enc.SetIndent("", " ")
}
return enc.Encode(rpt)
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
// runCmd executes an external command with a 4-second timeout and returns its
// combined stdout as a trimmed string.
func runCmd(ctx context.Context, name string, args ...string) (string, error) {
ctx, cancel := context.WithTimeout(ctx, 4*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, name, args...)
out, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("%s: %w", name, err)
}
return strings.TrimSpace(string(out)), nil
}
// httpGet performs an HTTP GET request with a 3-second timeout and returns the
// response body bytes.
func httpGet(ctx context.Context, url string) ([]byte, error) {
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode >= 400 {
return body, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url)
}
return body, nil
}

View File

@ -0,0 +1,260 @@
package report
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strconv"
"time"
)
const rqliteBase = "http://localhost:5001"
// collectRQLite queries the local RQLite HTTP API to build a health report.
func collectRQLite() *RQLiteReport {
r := &RQLiteReport{}
// 1. GET /status — core Raft and node metadata.
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
statusBody, err := httpGet(ctx, rqliteBase+"/status")
if err != nil {
r.Responsive = false
return r
}
var status map[string]interface{}
if err := json.Unmarshal(statusBody, &status); err != nil {
r.Responsive = false
return r
}
r.Responsive = true
// Extract fields from the nested status JSON.
r.RaftState = getNestedString(status, "store", "raft", "state")
r.LeaderAddr = getNestedString(status, "store", "leader", "addr")
r.LeaderID = getNestedString(status, "store", "leader", "node_id")
r.NodeID = getNestedString(status, "store", "node_id")
r.Term = uint64(getNestedFloat(status, "store", "raft", "current_term"))
r.Applied = uint64(getNestedFloat(status, "store", "raft", "applied_index"))
r.Commit = uint64(getNestedFloat(status, "store", "raft", "commit_index"))
r.FsmPending = uint64(getNestedFloat(status, "store", "raft", "fsm_pending"))
r.LastContact = getNestedString(status, "store", "raft", "last_contact")
r.Voter = getNestedBool(status, "store", "raft", "voter")
r.DBSize = getNestedString(status, "store", "sqlite3", "db_size_friendly")
r.Uptime = getNestedString(status, "http", "uptime")
r.Version = getNestedString(status, "build", "version")
r.Goroutines = int(getNestedFloat(status, "runtime", "num_goroutine"))
// HeapMB: bytes → MB.
heapBytes := getNestedFloat(status, "runtime", "memory", "heap_alloc")
if heapBytes > 0 {
r.HeapMB = int(heapBytes / (1024 * 1024))
}
// NumPeers may be a number or a string in the JSON; handle both.
r.NumPeers = getNestedInt(status, "store", "raft", "num_peers")
// 2. GET /nodes?nonvoters — cluster node list.
{
ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel2()
if body, err := httpGet(ctx2, rqliteBase+"/nodes?nonvoters"); err == nil {
var rawNodes map[string]struct {
Addr string `json:"addr"`
Reachable bool `json:"reachable"`
Leader bool `json:"leader"`
Voter bool `json:"voter"`
Time float64 `json:"time"`
Error string `json:"error"`
}
if err := json.Unmarshal(body, &rawNodes); err == nil {
r.Nodes = make(map[string]RQLiteNodeInfo, len(rawNodes))
for id, n := range rawNodes {
r.Nodes[id] = RQLiteNodeInfo{
Reachable: n.Reachable,
Leader: n.Leader,
Voter: n.Voter,
TimeMS: n.Time * 1000, // seconds → milliseconds
Error: n.Error,
}
}
}
}
}
// 3. GET /readyz — readiness probe.
{
ctx3, cancel3 := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel3()
req, err := http.NewRequestWithContext(ctx3, http.MethodGet, rqliteBase+"/readyz", nil)
if err == nil {
if resp, err := http.DefaultClient.Do(req); err == nil {
resp.Body.Close()
r.Ready = resp.StatusCode == http.StatusOK
}
}
}
// 4. POST /db/query?level=strong — strong read test.
{
ctx4, cancel4 := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel4()
payload := []byte(`["SELECT 1"]`)
req, err := http.NewRequestWithContext(ctx4, http.MethodPost, rqliteBase+"/db/query?level=strong", bytes.NewReader(payload))
if err == nil {
req.Header.Set("Content-Type", "application/json")
if resp, err := http.DefaultClient.Do(req); err == nil {
io.Copy(io.Discard, resp.Body)
resp.Body.Close()
r.StrongRead = resp.StatusCode == http.StatusOK
}
}
}
// 5. GET /debug/vars — error counters.
{
ctx5, cancel5 := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel5()
if body, err := httpGet(ctx5, rqliteBase+"/debug/vars"); err == nil {
var vars map[string]interface{}
if err := json.Unmarshal(body, &vars); err == nil {
r.DebugVars = &RQLiteDebugVarsReport{
QueryErrors: jsonUint64(vars, "api_query_errors"),
ExecuteErrors: jsonUint64(vars, "api_execute_errors"),
RemoteExecErrors: jsonUint64(vars, "api_remote_exec_errors"),
LeaderNotFound: jsonUint64(vars, "store_leader_not_found"),
SnapshotErrors: jsonUint64(vars, "snapshot_errors"),
ClientRetries: jsonUint64(vars, "client_retries"),
ClientTimeouts: jsonUint64(vars, "client_timeouts"),
}
}
}
}
return r
}
// ---------------------------------------------------------------------------
// Nested-map extraction helpers
// ---------------------------------------------------------------------------
// getNestedString traverses nested map[string]interface{} values and returns
// the final value as a string. Returns "" if any key is missing or the leaf
// is not a string.
func getNestedString(m map[string]interface{}, keys ...string) string {
v := getNestedValue(m, keys...)
if v == nil {
return ""
}
if s, ok := v.(string); ok {
return s
}
return fmt.Sprintf("%v", v)
}
// getNestedFloat traverses nested maps and returns the leaf as a float64.
// JSON numbers are decoded as float64 by encoding/json into interface{}.
func getNestedFloat(m map[string]interface{}, keys ...string) float64 {
v := getNestedValue(m, keys...)
if v == nil {
return 0
}
switch n := v.(type) {
case float64:
return n
case json.Number:
if f, err := n.Float64(); err == nil {
return f
}
case string:
if f, err := strconv.ParseFloat(n, 64); err == nil {
return f
}
}
return 0
}
// getNestedBool traverses nested maps and returns the leaf as a bool.
func getNestedBool(m map[string]interface{}, keys ...string) bool {
v := getNestedValue(m, keys...)
if v == nil {
return false
}
if b, ok := v.(bool); ok {
return b
}
return false
}
// getNestedInt traverses nested maps and returns the leaf as an int.
// Handles both numeric and string representations (RQLite sometimes
// returns num_peers as a string).
func getNestedInt(m map[string]interface{}, keys ...string) int {
v := getNestedValue(m, keys...)
if v == nil {
return 0
}
switch n := v.(type) {
case float64:
return int(n)
case json.Number:
if i, err := n.Int64(); err == nil {
return int(i)
}
case string:
if i, err := strconv.Atoi(n); err == nil {
return i
}
}
return 0
}
// getNestedValue walks through nested map[string]interface{} following the
// given key path and returns the leaf value, or nil if any step fails.
func getNestedValue(m map[string]interface{}, keys ...string) interface{} {
if len(keys) == 0 {
return nil
}
current := interface{}(m)
for _, key := range keys {
cm, ok := current.(map[string]interface{})
if !ok {
return nil
}
current, ok = cm[key]
if !ok {
return nil
}
}
return current
}
// jsonUint64 reads a top-level key from a flat map as uint64.
func jsonUint64(m map[string]interface{}, key string) uint64 {
v, ok := m[key]
if !ok {
return 0
}
switch n := v.(type) {
case float64:
return uint64(n)
case json.Number:
if i, err := n.Int64(); err == nil {
return uint64(i)
}
case string:
if i, err := strconv.ParseUint(n, 10, 64); err == nil {
return i
}
}
return 0
}

View File

@ -0,0 +1,201 @@
package report
import (
"context"
"path/filepath"
"strconv"
"strings"
"time"
)
var coreServices = []string{
"orama-node",
"orama-gateway",
"orama-olric",
"orama-ipfs",
"orama-ipfs-cluster",
"orama-anyone-relay",
"orama-anyone-client",
"coredns",
"caddy",
"wg-quick@wg0",
}
func collectServices() *ServicesReport {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
report := &ServicesReport{}
// Collect core services.
for _, name := range coreServices {
info := collectServiceInfo(ctx, name)
report.Services = append(report.Services, info)
}
// Discover namespace services (orama-deploy-*.service).
nsServices := discoverNamespaceServices()
for _, name := range nsServices {
info := collectServiceInfo(ctx, name)
report.Services = append(report.Services, info)
}
// Collect failed units.
report.FailedUnits = collectFailedUnits(ctx)
return report
}
func collectServiceInfo(ctx context.Context, name string) ServiceInfo {
info := ServiceInfo{Name: name}
// Get all properties in a single systemctl show call.
out, err := runCmd(ctx, "systemctl", "show", name,
"--property=ActiveState,SubState,NRestarts,ActiveEnterTimestamp,MemoryCurrent,CPUUsageNSec,MainPID")
if err != nil {
info.ActiveState = "unknown"
info.SubState = "unknown"
return info
}
props := parseProperties(out)
info.ActiveState = props["ActiveState"]
info.SubState = props["SubState"]
info.NRestarts = parseInt(props["NRestarts"])
info.MainPID = parseInt(props["MainPID"])
info.MemoryCurrentMB = parseMemoryMB(props["MemoryCurrent"])
info.CPUUsageNSec = parseInt64(props["CPUUsageNSec"])
// Calculate uptime from ActiveEnterTimestamp.
if ts := props["ActiveEnterTimestamp"]; ts != "" && ts != "n/a" {
info.ActiveSinceSec = parseActiveSince(ts)
}
// Check if service is enabled.
enabledOut, err := runCmd(ctx, "systemctl", "is-enabled", name)
if err == nil && strings.TrimSpace(enabledOut) == "enabled" {
info.Enabled = true
}
// Restart loop detection: restarted more than 3 times and running for less than 5 minutes.
info.RestartLoopRisk = info.NRestarts > 3 && info.ActiveSinceSec > 0 && info.ActiveSinceSec < 300
return info
}
// parseProperties parses "Key=Value" lines from systemctl show output into a map.
func parseProperties(output string) map[string]string {
props := make(map[string]string)
for _, line := range strings.Split(output, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
idx := strings.IndexByte(line, '=')
if idx < 0 {
continue
}
key := line[:idx]
value := line[idx+1:]
props[key] = value
}
return props
}
// parseMemoryMB converts a MemoryCurrent value (bytes as uint64, "[not set]", or "infinity") to MB.
func parseMemoryMB(s string) int {
s = strings.TrimSpace(s)
if s == "" || s == "[not set]" || s == "infinity" {
return 0
}
bytes, err := strconv.ParseUint(s, 10, 64)
if err != nil {
return 0
}
return int(bytes / (1024 * 1024))
}
// parseActiveSince parses an ActiveEnterTimestamp like "Fri 2024-01-05 10:30:00 UTC"
// and returns the number of seconds elapsed since that time.
func parseActiveSince(ts string) int64 {
// systemctl outputs timestamps in the form: "Day YYYY-MM-DD HH:MM:SS TZ"
// e.g. "Fri 2024-01-05 10:30:00 UTC"
layouts := []string{
"Mon 2006-01-02 15:04:05 MST",
"Mon 2006-01-02 15:04:05 -0700",
}
ts = strings.TrimSpace(ts)
for _, layout := range layouts {
t, err := time.Parse(layout, ts)
if err == nil {
sec := int64(time.Since(t).Seconds())
if sec < 0 {
return 0
}
return sec
}
}
return 0
}
func parseInt(s string) int {
s = strings.TrimSpace(s)
if s == "" || s == "[not set]" {
return 0
}
v, _ := strconv.Atoi(s)
return v
}
func parseInt64(s string) int64 {
s = strings.TrimSpace(s)
if s == "" || s == "[not set]" {
return 0
}
v, _ := strconv.ParseInt(s, 10, 64)
return v
}
// collectFailedUnits runs `systemctl --failed` and extracts unit names from the first column.
func collectFailedUnits(ctx context.Context) []string {
out, err := runCmd(ctx, "systemctl", "--failed", "--no-legend", "--no-pager")
if err != nil {
return nil
}
var units []string
for _, line := range strings.Split(out, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
fields := strings.Fields(line)
if len(fields) > 0 {
// First column may have a bullet prefix; strip common markers.
unit := strings.TrimLeft(fields[0], "●* ")
if unit != "" {
units = append(units, unit)
}
}
}
return units
}
// discoverNamespaceServices finds orama-deploy-*.service files in /etc/systemd/system
// and returns the service names (without the .service suffix path).
func discoverNamespaceServices() []string {
matches, err := filepath.Glob("/etc/systemd/system/orama-deploy-*.service")
if err != nil || len(matches) == 0 {
return nil
}
var services []string
for _, path := range matches {
base := filepath.Base(path)
// Strip the .service suffix to get the unit name.
name := strings.TrimSuffix(base, ".service")
services = append(services, name)
}
return services
}

View File

@ -0,0 +1,200 @@
package report
import (
"context"
"os"
"strconv"
"strings"
"time"
)
// collectSystem gathers system-level metrics using local commands and /proc files.
func collectSystem() *SystemReport {
r := &SystemReport{}
// 1. Uptime seconds: read /proc/uptime, parse first field
if data, err := os.ReadFile("/proc/uptime"); err == nil {
fields := strings.Fields(string(data))
if len(fields) >= 1 {
if f, err := strconv.ParseFloat(fields[0], 64); err == nil {
r.UptimeSeconds = int64(f)
}
}
}
// 2. Uptime since: run `uptime -s`
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "uptime", "-s"); err == nil {
r.UptimeSince = strings.TrimSpace(out)
}
}
// 3. CPU count: run `nproc`
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "nproc"); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.CPUCount = n
}
}
}
// 4. Load averages: read /proc/loadavg, parse first 3 fields
if data, err := os.ReadFile("/proc/loadavg"); err == nil {
fields := strings.Fields(string(data))
if len(fields) >= 3 {
if f, err := strconv.ParseFloat(fields[0], 64); err == nil {
r.LoadAvg1 = f
}
if f, err := strconv.ParseFloat(fields[1], 64); err == nil {
r.LoadAvg5 = f
}
if f, err := strconv.ParseFloat(fields[2], 64); err == nil {
r.LoadAvg15 = f
}
}
}
// 5 & 6. Memory and swap: run `free -m`, parse Mem: and Swap: lines
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "free", "-m"); err == nil {
for _, line := range strings.Split(out, "\n") {
fields := strings.Fields(line)
if len(fields) >= 4 && fields[0] == "Mem:" {
// Mem: total used free shared buff/cache available
if n, err := strconv.Atoi(fields[1]); err == nil {
r.MemTotalMB = n
}
if n, err := strconv.Atoi(fields[2]); err == nil {
r.MemUsedMB = n
}
if n, err := strconv.Atoi(fields[3]); err == nil {
r.MemFreeMB = n
}
if len(fields) >= 7 {
if n, err := strconv.Atoi(fields[6]); err == nil {
r.MemAvailMB = n
}
}
if r.MemTotalMB > 0 {
r.MemUsePct = (r.MemTotalMB - r.MemAvailMB) * 100 / r.MemTotalMB
}
}
if len(fields) >= 3 && fields[0] == "Swap:" {
if n, err := strconv.Atoi(fields[1]); err == nil {
r.SwapTotalMB = n
}
if n, err := strconv.Atoi(fields[2]); err == nil {
r.SwapUsedMB = n
}
}
}
}
}
// 7. Disk usage: run `df -h /` and `df -h /opt/orama`, use whichever has higher usage
{
type diskInfo struct {
total string
used string
avail string
usePct int
}
parseDf := func(out string) *diskInfo {
lines := strings.Split(out, "\n")
if len(lines) < 2 {
return nil
}
fields := strings.Fields(lines[1])
if len(fields) < 5 {
return nil
}
pctStr := strings.TrimSuffix(fields[4], "%")
pct, err := strconv.Atoi(pctStr)
if err != nil {
return nil
}
return &diskInfo{
total: fields[1],
used: fields[2],
avail: fields[3],
usePct: pct,
}
}
ctx1, cancel1 := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel1()
rootDisk := (*diskInfo)(nil)
if out, err := runCmd(ctx1, "df", "-h", "/"); err == nil {
rootDisk = parseDf(out)
}
ctx2, cancel2 := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel2()
optDisk := (*diskInfo)(nil)
if out, err := runCmd(ctx2, "df", "-h", "/opt/orama"); err == nil {
optDisk = parseDf(out)
}
best := rootDisk
if optDisk != nil && (best == nil || optDisk.usePct > best.usePct) {
best = optDisk
}
if best != nil {
r.DiskTotalGB = best.total
r.DiskUsedGB = best.used
r.DiskAvailGB = best.avail
r.DiskUsePct = best.usePct
}
}
// 8. Inode usage: run `df -i /`, parse Use% from second line
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "df", "-i", "/"); err == nil {
lines := strings.Split(out, "\n")
if len(lines) >= 2 {
fields := strings.Fields(lines[1])
if len(fields) >= 5 {
pctStr := strings.TrimSuffix(fields[4], "%")
if n, err := strconv.Atoi(pctStr); err == nil {
r.InodePct = n
}
}
}
}
}
// 9. OOM kills: run `dmesg 2>/dev/null | grep -ci 'out of memory'` via bash -c
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "bash", "-c", "dmesg 2>/dev/null | grep -ci 'out of memory'"); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.OOMKills = n
}
}
// On error, OOMKills stays 0 (zero value)
}
// 10. Kernel version: run `uname -r`
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "uname", "-r"); err == nil {
r.KernelVersion = strings.TrimSpace(out)
}
}
// 11. Current unix timestamp
r.TimeUnix = time.Now().Unix()
return r
}

View File

@ -0,0 +1,275 @@
package report
import "time"
// NodeReport is the top-level JSON output of `orama node report --json`.
type NodeReport struct {
Timestamp time.Time `json:"timestamp"`
Hostname string `json:"hostname"`
PublicIP string `json:"public_ip,omitempty"`
WGIP string `json:"wireguard_ip,omitempty"`
Version string `json:"version"`
CollectMS int64 `json:"collect_ms"`
Errors []string `json:"errors,omitempty"`
System *SystemReport `json:"system"`
Services *ServicesReport `json:"services"`
RQLite *RQLiteReport `json:"rqlite,omitempty"`
Olric *OlricReport `json:"olric,omitempty"`
IPFS *IPFSReport `json:"ipfs,omitempty"`
Gateway *GatewayReport `json:"gateway,omitempty"`
WireGuard *WireGuardReport `json:"wireguard,omitempty"`
DNS *DNSReport `json:"dns,omitempty"`
Anyone *AnyoneReport `json:"anyone,omitempty"`
Network *NetworkReport `json:"network"`
Processes *ProcessReport `json:"processes"`
Namespaces []NamespaceReport `json:"namespaces,omitempty"`
}
// --- System ---
type SystemReport struct {
UptimeSeconds int64 `json:"uptime_seconds"`
UptimeSince string `json:"uptime_since"`
CPUCount int `json:"cpu_count"`
LoadAvg1 float64 `json:"load_avg_1"`
LoadAvg5 float64 `json:"load_avg_5"`
LoadAvg15 float64 `json:"load_avg_15"`
MemTotalMB int `json:"mem_total_mb"`
MemUsedMB int `json:"mem_used_mb"`
MemFreeMB int `json:"mem_free_mb"`
MemAvailMB int `json:"mem_available_mb"`
MemUsePct int `json:"mem_use_pct"`
SwapTotalMB int `json:"swap_total_mb"`
SwapUsedMB int `json:"swap_used_mb"`
DiskTotalGB string `json:"disk_total_gb"`
DiskUsedGB string `json:"disk_used_gb"`
DiskAvailGB string `json:"disk_avail_gb"`
DiskUsePct int `json:"disk_use_pct"`
InodePct int `json:"inode_use_pct"`
OOMKills int `json:"oom_kills"`
KernelVersion string `json:"kernel_version"`
TimeUnix int64 `json:"time_unix"`
}
// --- Systemd Services ---
type ServicesReport struct {
Services []ServiceInfo `json:"services"`
FailedUnits []string `json:"failed_units,omitempty"`
}
type ServiceInfo struct {
Name string `json:"name"`
ActiveState string `json:"active_state"`
SubState string `json:"sub_state"`
Enabled bool `json:"enabled"`
NRestarts int `json:"n_restarts"`
ActiveSinceSec int64 `json:"active_since_sec"`
MemoryCurrentMB int `json:"memory_current_mb"`
CPUUsageNSec int64 `json:"cpu_usage_nsec"`
MainPID int `json:"main_pid"`
RestartLoopRisk bool `json:"restart_loop_risk"`
}
// --- RQLite ---
type RQLiteReport struct {
Responsive bool `json:"responsive"`
Ready bool `json:"ready"`
StrongRead bool `json:"strong_read"`
RaftState string `json:"raft_state,omitempty"`
LeaderAddr string `json:"leader_addr,omitempty"`
LeaderID string `json:"leader_id,omitempty"`
NodeID string `json:"node_id,omitempty"`
Term uint64 `json:"term,omitempty"`
Applied uint64 `json:"applied_index,omitempty"`
Commit uint64 `json:"commit_index,omitempty"`
FsmPending uint64 `json:"fsm_pending,omitempty"`
LastContact string `json:"last_contact,omitempty"`
NumPeers int `json:"num_peers,omitempty"`
Voter bool `json:"voter,omitempty"`
DBSize string `json:"db_size,omitempty"`
Uptime string `json:"uptime,omitempty"`
Version string `json:"version,omitempty"`
Goroutines int `json:"goroutines,omitempty"`
HeapMB int `json:"heap_mb,omitempty"`
Nodes map[string]RQLiteNodeInfo `json:"nodes,omitempty"`
DebugVars *RQLiteDebugVarsReport `json:"debug_vars,omitempty"`
}
type RQLiteNodeInfo struct {
Reachable bool `json:"reachable"`
Leader bool `json:"leader"`
Voter bool `json:"voter"`
TimeMS float64 `json:"time_ms"`
Error string `json:"error,omitempty"`
}
type RQLiteDebugVarsReport struct {
QueryErrors uint64 `json:"query_errors"`
ExecuteErrors uint64 `json:"execute_errors"`
RemoteExecErrors uint64 `json:"remote_exec_errors"`
LeaderNotFound uint64 `json:"leader_not_found"`
SnapshotErrors uint64 `json:"snapshot_errors"`
ClientRetries uint64 `json:"client_retries"`
ClientTimeouts uint64 `json:"client_timeouts"`
}
// --- Olric ---
type OlricReport struct {
ServiceActive bool `json:"service_active"`
MemberlistUp bool `json:"memberlist_up"`
MemberCount int `json:"member_count,omitempty"`
Members []string `json:"members,omitempty"`
Coordinator string `json:"coordinator,omitempty"`
ProcessMemMB int `json:"process_mem_mb"`
RestartCount int `json:"restart_count"`
LogErrors int `json:"log_errors_1h"`
LogSuspects int `json:"log_suspects_1h"`
LogFlapping int `json:"log_flapping_1h"`
}
// --- IPFS ---
type IPFSReport struct {
DaemonActive bool `json:"daemon_active"`
ClusterActive bool `json:"cluster_active"`
SwarmPeerCount int `json:"swarm_peer_count"`
ClusterPeerCount int `json:"cluster_peer_count"`
ClusterErrors int `json:"cluster_errors"`
RepoSizeBytes int64 `json:"repo_size_bytes"`
RepoMaxBytes int64 `json:"repo_max_bytes"`
RepoUsePct int `json:"repo_use_pct"`
KuboVersion string `json:"kubo_version,omitempty"`
ClusterVersion string `json:"cluster_version,omitempty"`
HasSwarmKey bool `json:"has_swarm_key"`
BootstrapEmpty bool `json:"bootstrap_empty"`
}
// --- Gateway ---
type GatewayReport struct {
Responsive bool `json:"responsive"`
HTTPStatus int `json:"http_status,omitempty"`
Version string `json:"version,omitempty"`
Subsystems map[string]SubsystemHealth `json:"subsystems,omitempty"`
}
type SubsystemHealth struct {
Status string `json:"status"`
Latency string `json:"latency,omitempty"`
Error string `json:"error,omitempty"`
}
// --- WireGuard ---
type WireGuardReport struct {
InterfaceUp bool `json:"interface_up"`
ServiceActive bool `json:"service_active"`
WgIP string `json:"wg_ip,omitempty"`
ListenPort int `json:"listen_port,omitempty"`
PeerCount int `json:"peer_count"`
MTU int `json:"mtu,omitempty"`
ConfigExists bool `json:"config_exists"`
ConfigPerms string `json:"config_perms,omitempty"`
Peers []WGPeerInfo `json:"peers,omitempty"`
}
type WGPeerInfo struct {
PublicKey string `json:"public_key"`
Endpoint string `json:"endpoint,omitempty"`
AllowedIPs string `json:"allowed_ips"`
LatestHandshake int64 `json:"latest_handshake"`
HandshakeAgeSec int64 `json:"handshake_age_sec"`
TransferRx int64 `json:"transfer_rx_bytes"`
TransferTx int64 `json:"transfer_tx_bytes"`
Keepalive int `json:"keepalive,omitempty"`
}
// --- DNS ---
type DNSReport struct {
CoreDNSActive bool `json:"coredns_active"`
CaddyActive bool `json:"caddy_active"`
Port53Bound bool `json:"port_53_bound"`
Port80Bound bool `json:"port_80_bound"`
Port443Bound bool `json:"port_443_bound"`
CoreDNSMemMB int `json:"coredns_mem_mb"`
CoreDNSRestarts int `json:"coredns_restarts"`
LogErrors int `json:"log_errors_5m"`
CorefileExists bool `json:"corefile_exists"`
SOAResolves bool `json:"soa_resolves"`
NSResolves bool `json:"ns_resolves"`
NSRecordCount int `json:"ns_record_count"`
WildcardResolves bool `json:"wildcard_resolves"`
BaseAResolves bool `json:"base_a_resolves"`
BaseTLSDaysLeft int `json:"base_tls_days_left"`
WildTLSDaysLeft int `json:"wild_tls_days_left"`
}
// --- Anyone ---
type AnyoneReport struct {
RelayActive bool `json:"relay_active"`
ClientActive bool `json:"client_active"`
Mode string `json:"mode,omitempty"`
ORPortListening bool `json:"orport_listening"`
SocksListening bool `json:"socks_listening"`
ControlListening bool `json:"control_listening"`
Bootstrapped bool `json:"bootstrapped"`
BootstrapPct int `json:"bootstrap_pct"`
Fingerprint string `json:"fingerprint,omitempty"`
Nickname string `json:"nickname,omitempty"`
}
// --- Network ---
type NetworkReport struct {
InternetReachable bool `json:"internet_reachable"`
DefaultRoute bool `json:"default_route"`
WGRouteExists bool `json:"wg_route_exists"`
TCPEstablished int `json:"tcp_established"`
TCPTimeWait int `json:"tcp_time_wait"`
TCPRetransRate float64 `json:"tcp_retrans_pct"`
ListeningPorts []PortInfo `json:"listening_ports"`
UFWActive bool `json:"ufw_active"`
UFWRules []string `json:"ufw_rules,omitempty"`
}
type PortInfo struct {
Port int `json:"port"`
Proto string `json:"proto"`
Process string `json:"process,omitempty"`
}
// --- Processes ---
type ProcessReport struct {
ZombieCount int `json:"zombie_count"`
Zombies []ProcessInfo `json:"zombies,omitempty"`
OrphanCount int `json:"orphan_count"`
Orphans []ProcessInfo `json:"orphans,omitempty"`
PanicCount int `json:"panic_count_1h"`
}
type ProcessInfo struct {
PID int `json:"pid"`
PPID int `json:"ppid"`
State string `json:"state"`
Command string `json:"command"`
}
// --- Namespaces ---
type NamespaceReport struct {
Name string `json:"name"`
PortBase int `json:"port_base"`
RQLiteUp bool `json:"rqlite_up"`
RQLiteState string `json:"rqlite_state,omitempty"`
RQLiteReady bool `json:"rqlite_ready"`
OlricUp bool `json:"olric_up"`
GatewayUp bool `json:"gateway_up"`
GatewayStatus int `json:"gateway_status,omitempty"`
}

View File

@ -0,0 +1,163 @@
package report
import (
"context"
"os"
"strconv"
"strings"
"time"
)
// collectWireGuard gathers WireGuard interface status, peer information,
// and configuration details using local commands and sysfs.
func collectWireGuard() *WireGuardReport {
r := &WireGuardReport{}
// 1. ServiceActive: check if wg-quick@wg0 systemd service is active
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "systemctl", "is-active", "wg-quick@wg0"); err == nil {
r.ServiceActive = strings.TrimSpace(out) == "active"
}
}
// 2. InterfaceUp: check if /sys/class/net/wg0 exists
if _, err := os.Stat("/sys/class/net/wg0"); err == nil {
r.InterfaceUp = true
}
// If interface is not up, return partial data early.
if !r.InterfaceUp {
// Still check config existence even if interface is down.
if _, err := os.Stat("/etc/wireguard/wg0.conf"); err == nil {
r.ConfigExists = true
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "stat", "-c", "%a", "/etc/wireguard/wg0.conf"); err == nil {
r.ConfigPerms = strings.TrimSpace(out)
}
}
return r
}
// 3. WgIP: extract IP from `ip -4 addr show wg0`
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "ip", "-4", "addr", "show", "wg0"); err == nil {
for _, line := range strings.Split(out, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "inet ") {
// Line format: "inet X.X.X.X/Y scope ..."
fields := strings.Fields(line)
if len(fields) >= 2 {
// Extract just the IP, strip the /prefix
ip := fields[1]
if idx := strings.Index(ip, "/"); idx != -1 {
ip = ip[:idx]
}
r.WgIP = ip
}
break
}
}
}
}
// 4. MTU: read /sys/class/net/wg0/mtu
if data, err := os.ReadFile("/sys/class/net/wg0/mtu"); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(string(data))); err == nil {
r.MTU = n
}
}
// 5. ListenPort: parse from `wg show wg0 listen-port`
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "wg", "show", "wg0", "listen-port"); err == nil {
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
r.ListenPort = n
}
}
}
// 6. ConfigExists: check if /etc/wireguard/wg0.conf exists
if _, err := os.Stat("/etc/wireguard/wg0.conf"); err == nil {
r.ConfigExists = true
}
// 7. ConfigPerms: run `stat -c '%a' /etc/wireguard/wg0.conf`
if r.ConfigExists {
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "stat", "-c", "%a", "/etc/wireguard/wg0.conf"); err == nil {
r.ConfigPerms = strings.TrimSpace(out)
}
}
// 8. Peers: run `wg show wg0 dump` and parse peer lines
// Line 1: interface (private_key, public_key, listen_port, fwmark)
// Line 2+: peers (public_key, preshared_key, endpoint, allowed_ips,
// latest_handshake, transfer_rx, transfer_tx, persistent_keepalive)
{
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
defer cancel()
if out, err := runCmd(ctx, "wg", "show", "wg0", "dump"); err == nil {
lines := strings.Split(out, "\n")
now := time.Now().Unix()
for i, line := range lines {
if i == 0 {
// Skip interface line
continue
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
fields := strings.Split(line, "\t")
if len(fields) < 8 {
continue
}
peer := WGPeerInfo{
PublicKey: fields[0],
Endpoint: fields[2],
AllowedIPs: fields[3],
}
// LatestHandshake: unix timestamp (0 = never)
if ts, err := strconv.ParseInt(fields[4], 10, 64); err == nil {
peer.LatestHandshake = ts
if ts > 0 {
peer.HandshakeAgeSec = now - ts
}
}
// TransferRx
if n, err := strconv.ParseInt(fields[5], 10, 64); err == nil {
peer.TransferRx = n
}
// TransferTx
if n, err := strconv.ParseInt(fields[6], 10, 64); err == nil {
peer.TransferTx = n
}
// PersistentKeepalive
if fields[7] != "off" {
if n, err := strconv.Atoi(fields[7]); err == nil {
peer.Keepalive = n
}
}
r.Peers = append(r.Peers, peer)
}
r.PeerCount = len(r.Peers)
}
}
return r
}