mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-03-17 03:33:01 +00:00
Created new monitoring cli interface for nodes, namespaces and cluster
This commit is contained in:
parent
88ba08fcba
commit
1e38fc2861
65
README.md
65
README.md
@ -130,11 +130,11 @@ orama deploy go <path> --name myapp # Go binaries (must have /health e
|
||||
orama deploy nodejs <path> --name myapp # Node.js apps (must have /health endpoint)
|
||||
|
||||
# Manage deployments
|
||||
orama deployments list # List all deployments
|
||||
orama deployments get <name> # Get deployment details
|
||||
orama deployments logs <name> --follow # View logs
|
||||
orama deployments delete <name> # Delete deployment
|
||||
orama deployments rollback <name> --version 1 # Rollback to version
|
||||
orama app list # List all deployments
|
||||
orama app get <name> # Get deployment details
|
||||
orama app logs <name> --follow # View logs
|
||||
orama app delete <name> # Delete deployment
|
||||
orama app rollback <name> --version 1 # Rollback to version
|
||||
```
|
||||
|
||||
### SQLite Databases
|
||||
@ -147,28 +147,12 @@ orama db backup <name> # Backup to IPFS
|
||||
orama db backups <name> # List backups
|
||||
```
|
||||
|
||||
### Network Status
|
||||
### Environment Management
|
||||
|
||||
```bash
|
||||
orama health # Cluster health check
|
||||
orama peers # List connected peers
|
||||
orama status # Network status
|
||||
```
|
||||
|
||||
### RQLite Operations
|
||||
|
||||
```bash
|
||||
orama query "SELECT * FROM users"
|
||||
orama query "CREATE TABLE users (id INTEGER PRIMARY KEY)"
|
||||
orama transaction --file ops.json
|
||||
```
|
||||
|
||||
### Pub/Sub
|
||||
|
||||
```bash
|
||||
orama pubsub publish <topic> <message>
|
||||
orama pubsub subscribe <topic> 30s
|
||||
orama pubsub topics
|
||||
orama env list # List available environments
|
||||
orama env current # Show active environment
|
||||
orama env use <name> # Switch environment
|
||||
```
|
||||
|
||||
## Serverless Functions (WASM)
|
||||
@ -267,14 +251,14 @@ Orama Network integrates with the [Anyone Protocol](https://anyone.io) for anony
|
||||
|
||||
```bash
|
||||
# Install as relay operator (earn rewards)
|
||||
sudo orama install --vps-ip <IP> --domain <domain> \
|
||||
sudo orama node install --vps-ip <IP> --domain <domain> \
|
||||
--anyone-relay \
|
||||
--anyone-nickname "MyRelay" \
|
||||
--anyone-contact "operator@email.com" \
|
||||
--anyone-wallet "0x1234...abcd"
|
||||
|
||||
# With exit relay (legal implications apply)
|
||||
sudo orama install --vps-ip <IP> --domain <domain> \
|
||||
sudo orama node install --vps-ip <IP> --domain <domain> \
|
||||
--anyone-relay \
|
||||
--anyone-exit \
|
||||
--anyone-nickname "MyExitRelay" \
|
||||
@ -282,7 +266,7 @@ sudo orama install --vps-ip <IP> --domain <domain> \
|
||||
--anyone-wallet "0x1234...abcd"
|
||||
|
||||
# Migrate existing Anyone installation
|
||||
sudo orama install --vps-ip <IP> --domain <domain> \
|
||||
sudo orama node install --vps-ip <IP> --domain <domain> \
|
||||
--anyone-relay \
|
||||
--anyone-migrate \
|
||||
--anyone-nickname "MyRelay" \
|
||||
@ -317,31 +301,34 @@ go install github.com/DeBrosOfficial/network/cmd/cli@latest
|
||||
**Setup (after installation):**
|
||||
|
||||
```bash
|
||||
sudo orama install --interactive
|
||||
sudo orama node install --interactive
|
||||
```
|
||||
|
||||
### Service Management
|
||||
|
||||
```bash
|
||||
# Status
|
||||
orama status
|
||||
sudo orama node status
|
||||
|
||||
# Control services
|
||||
sudo orama start
|
||||
sudo orama stop
|
||||
sudo orama restart
|
||||
sudo orama node start
|
||||
sudo orama node stop
|
||||
sudo orama node restart
|
||||
|
||||
# Diagnose issues
|
||||
sudo orama node doctor
|
||||
|
||||
# View logs
|
||||
orama logs node --follow
|
||||
orama logs gateway --follow
|
||||
orama logs ipfs --follow
|
||||
orama node logs node --follow
|
||||
orama node logs gateway --follow
|
||||
orama node logs ipfs --follow
|
||||
```
|
||||
|
||||
### Upgrade
|
||||
|
||||
```bash
|
||||
# Upgrade to latest version
|
||||
sudo orama upgrade --interactive
|
||||
sudo orama node upgrade --restart
|
||||
```
|
||||
|
||||
## Configuration
|
||||
@ -397,9 +384,9 @@ rqlite -H localhost -p 5001
|
||||
|
||||
```bash
|
||||
# Production reset (⚠️ DESTROYS DATA)
|
||||
sudo orama uninstall
|
||||
sudo orama node uninstall
|
||||
sudo rm -rf /opt/orama/.orama
|
||||
sudo orama install
|
||||
sudo orama node install
|
||||
```
|
||||
|
||||
## HTTP Gateway API
|
||||
|
||||
@ -13,6 +13,7 @@ import (
|
||||
deploycmd "github.com/DeBrosOfficial/network/pkg/cli/cmd/deploy"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/cmd/envcmd"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/cmd/inspectcmd"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/cmd/monitorcmd"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/cmd/namespacecmd"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/cmd/node"
|
||||
)
|
||||
@ -75,6 +76,9 @@ and interacting with the Orama distributed network.`,
|
||||
// Inspect command
|
||||
rootCmd.AddCommand(inspectcmd.Cmd)
|
||||
|
||||
// Monitor command
|
||||
rootCmd.AddCommand(monitorcmd.Cmd)
|
||||
|
||||
return rootCmd
|
||||
}
|
||||
|
||||
|
||||
200
pkg/cli/cmd/monitorcmd/monitor.go
Normal file
200
pkg/cli/cmd/monitorcmd/monitor.go
Normal file
@ -0,0 +1,200 @@
|
||||
package monitorcmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor/display"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor/tui"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// Cmd is the root monitor command.
|
||||
var Cmd = &cobra.Command{
|
||||
Use: "monitor",
|
||||
Short: "Monitor cluster health from your local machine",
|
||||
Long: `SSH into cluster nodes and display real-time health data.
|
||||
Runs 'orama node report --json' on each node and aggregates results.
|
||||
|
||||
Without a subcommand, launches the interactive TUI.`,
|
||||
RunE: runLive,
|
||||
}
|
||||
|
||||
// Shared persistent flags.
|
||||
var (
|
||||
flagEnv string
|
||||
flagJSON bool
|
||||
flagNode string
|
||||
flagConfig string
|
||||
)
|
||||
|
||||
func init() {
|
||||
Cmd.PersistentFlags().StringVar(&flagEnv, "env", "", "Environment: devnet, testnet, mainnet (required)")
|
||||
Cmd.PersistentFlags().BoolVar(&flagJSON, "json", false, "Machine-readable JSON output")
|
||||
Cmd.PersistentFlags().StringVar(&flagNode, "node", "", "Filter to specific node host/IP")
|
||||
Cmd.PersistentFlags().StringVar(&flagConfig, "config", "scripts/remote-nodes.conf", "Path to remote-nodes.conf")
|
||||
Cmd.MarkPersistentFlagRequired("env")
|
||||
|
||||
Cmd.AddCommand(liveCmd)
|
||||
Cmd.AddCommand(clusterCmd)
|
||||
Cmd.AddCommand(nodeCmd)
|
||||
Cmd.AddCommand(serviceCmd)
|
||||
Cmd.AddCommand(meshCmd)
|
||||
Cmd.AddCommand(dnsCmd)
|
||||
Cmd.AddCommand(namespacesCmd)
|
||||
Cmd.AddCommand(alertsCmd)
|
||||
Cmd.AddCommand(reportCmd)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Subcommands
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
var liveCmd = &cobra.Command{
|
||||
Use: "live",
|
||||
Short: "Interactive TUI monitor",
|
||||
RunE: runLive,
|
||||
}
|
||||
|
||||
var clusterCmd = &cobra.Command{
|
||||
Use: "cluster",
|
||||
Short: "Cluster overview (one-shot)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagJSON {
|
||||
return display.ClusterJSON(snap, os.Stdout)
|
||||
}
|
||||
return display.ClusterTable(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
var nodeCmd = &cobra.Command{
|
||||
Use: "node",
|
||||
Short: "Per-node health details (one-shot)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagJSON {
|
||||
return display.NodeJSON(snap, os.Stdout)
|
||||
}
|
||||
return display.NodeTable(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
var serviceCmd = &cobra.Command{
|
||||
Use: "service",
|
||||
Short: "Service status across the cluster (one-shot)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagJSON {
|
||||
return display.ServiceJSON(snap, os.Stdout)
|
||||
}
|
||||
return display.ServiceTable(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
var meshCmd = &cobra.Command{
|
||||
Use: "mesh",
|
||||
Short: "Mesh connectivity status (one-shot)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagJSON {
|
||||
return display.MeshJSON(snap, os.Stdout)
|
||||
}
|
||||
return display.MeshTable(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
var dnsCmd = &cobra.Command{
|
||||
Use: "dns",
|
||||
Short: "DNS health overview (one-shot)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagJSON {
|
||||
return display.DNSJSON(snap, os.Stdout)
|
||||
}
|
||||
return display.DNSTable(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
var namespacesCmd = &cobra.Command{
|
||||
Use: "namespaces",
|
||||
Short: "Namespace usage summary (one-shot)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagJSON {
|
||||
return display.NamespacesJSON(snap, os.Stdout)
|
||||
}
|
||||
return display.NamespacesTable(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
var alertsCmd = &cobra.Command{
|
||||
Use: "alerts",
|
||||
Short: "Active alerts and warnings (one-shot)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if flagJSON {
|
||||
return display.AlertsJSON(snap, os.Stdout)
|
||||
}
|
||||
return display.AlertsTable(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
var reportCmd = &cobra.Command{
|
||||
Use: "report",
|
||||
Short: "Full cluster report (JSON)",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snap, err := collectSnapshot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return display.FullReport(snap, os.Stdout)
|
||||
},
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func collectSnapshot() (*monitor.ClusterSnapshot, error) {
|
||||
cfg := newConfig()
|
||||
return monitor.CollectOnce(context.Background(), cfg)
|
||||
}
|
||||
|
||||
func newConfig() monitor.CollectorConfig {
|
||||
return monitor.CollectorConfig{
|
||||
ConfigPath: flagConfig,
|
||||
Env: flagEnv,
|
||||
NodeFilter: flagNode,
|
||||
Timeout: 30 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
func runLive(cmd *cobra.Command, args []string) error {
|
||||
cfg := newConfig()
|
||||
return tui.Run(cfg)
|
||||
}
|
||||
|
||||
@ -25,4 +25,5 @@ func init() {
|
||||
Cmd.AddCommand(inviteCmd)
|
||||
Cmd.AddCommand(migrateCmd)
|
||||
Cmd.AddCommand(doctorCmd)
|
||||
Cmd.AddCommand(reportCmd)
|
||||
}
|
||||
|
||||
22
pkg/cli/cmd/node/report.go
Normal file
22
pkg/cli/cmd/node/report.go
Normal file
@ -0,0 +1,22 @@
|
||||
package node
|
||||
|
||||
import (
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var reportCmd = &cobra.Command{
|
||||
Use: "report",
|
||||
Short: "Output comprehensive node health data as JSON",
|
||||
Long: `Collect all system and service data from this node and output
|
||||
as a single JSON blob. Designed to be called by 'orama monitor' over SSH.
|
||||
Requires root privileges for full data collection.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
jsonFlag, _ := cmd.Flags().GetBool("json")
|
||||
return report.Handle(jsonFlag, "")
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
reportCmd.Flags().Bool("json", true, "Output as JSON (default)")
|
||||
}
|
||||
454
pkg/cli/monitor/alerts.go
Normal file
454
pkg/cli/monitor/alerts.go
Normal file
@ -0,0 +1,454 @@
|
||||
package monitor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
|
||||
)
|
||||
|
||||
// AlertSeverity represents the severity of an alert.
|
||||
type AlertSeverity string
|
||||
|
||||
const (
|
||||
AlertCritical AlertSeverity = "critical"
|
||||
AlertWarning AlertSeverity = "warning"
|
||||
AlertInfo AlertSeverity = "info"
|
||||
)
|
||||
|
||||
// Alert represents a detected issue.
|
||||
type Alert struct {
|
||||
Severity AlertSeverity `json:"severity"`
|
||||
Subsystem string `json:"subsystem"`
|
||||
Node string `json:"node"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// DeriveAlerts scans a ClusterSnapshot and produces alerts.
|
||||
func DeriveAlerts(snap *ClusterSnapshot) []Alert {
|
||||
var alerts []Alert
|
||||
|
||||
// Collection failures
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil {
|
||||
alerts = append(alerts, Alert{
|
||||
Severity: AlertCritical,
|
||||
Subsystem: "ssh",
|
||||
Node: cs.Node.Host,
|
||||
Message: fmt.Sprintf("Collection failed: %v", cs.Error),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
reports := snap.Healthy()
|
||||
if len(reports) == 0 {
|
||||
return alerts
|
||||
}
|
||||
|
||||
// Cross-node: RQLite leader
|
||||
alerts = append(alerts, checkRQLiteLeader(reports)...)
|
||||
|
||||
// Cross-node: Raft term consistency
|
||||
alerts = append(alerts, checkRaftTermConsistency(reports)...)
|
||||
|
||||
// Cross-node: Applied index lag
|
||||
alerts = append(alerts, checkAppliedIndexLag(reports)...)
|
||||
|
||||
// Cross-node: WireGuard peer symmetry
|
||||
alerts = append(alerts, checkWGPeerSymmetry(reports)...)
|
||||
|
||||
// Cross-node: Clock skew
|
||||
alerts = append(alerts, checkClockSkew(reports)...)
|
||||
|
||||
// Cross-node: Binary version
|
||||
alerts = append(alerts, checkBinaryVersion(reports)...)
|
||||
|
||||
// Per-node checks
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
alerts = append(alerts, checkNodeRQLite(r, host)...)
|
||||
alerts = append(alerts, checkNodeWireGuard(r, host)...)
|
||||
alerts = append(alerts, checkNodeSystem(r, host)...)
|
||||
alerts = append(alerts, checkNodeServices(r, host)...)
|
||||
alerts = append(alerts, checkNodeDNS(r, host)...)
|
||||
alerts = append(alerts, checkNodeAnyone(r, host)...)
|
||||
alerts = append(alerts, checkNodeProcesses(r, host)...)
|
||||
alerts = append(alerts, checkNodeNamespaces(r, host)...)
|
||||
alerts = append(alerts, checkNodeNetwork(r, host)...)
|
||||
}
|
||||
|
||||
return alerts
|
||||
}
|
||||
|
||||
func nodeHost(r *report.NodeReport) string {
|
||||
if r.PublicIP != "" {
|
||||
return r.PublicIP
|
||||
}
|
||||
return r.Hostname
|
||||
}
|
||||
|
||||
// --- Cross-node checks ---
|
||||
|
||||
func checkRQLiteLeader(reports []*report.NodeReport) []Alert {
|
||||
var alerts []Alert
|
||||
leaders := 0
|
||||
leaderAddrs := map[string]bool{}
|
||||
for _, r := range reports {
|
||||
if r.RQLite != nil && r.RQLite.RaftState == "Leader" {
|
||||
leaders++
|
||||
}
|
||||
if r.RQLite != nil && r.RQLite.LeaderAddr != "" {
|
||||
leaderAddrs[r.RQLite.LeaderAddr] = true
|
||||
}
|
||||
}
|
||||
|
||||
if leaders == 0 {
|
||||
alerts = append(alerts, Alert{AlertCritical, "rqlite", "cluster", "No RQLite leader found"})
|
||||
} else if leaders > 1 {
|
||||
alerts = append(alerts, Alert{AlertCritical, "rqlite", "cluster",
|
||||
fmt.Sprintf("Split brain: %d leaders detected", leaders)})
|
||||
}
|
||||
|
||||
if len(leaderAddrs) > 1 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "rqlite", "cluster",
|
||||
fmt.Sprintf("Leader disagreement: nodes report %d different leader addresses", len(leaderAddrs))})
|
||||
}
|
||||
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkRaftTermConsistency(reports []*report.NodeReport) []Alert {
|
||||
var minTerm, maxTerm uint64
|
||||
first := true
|
||||
for _, r := range reports {
|
||||
if r.RQLite == nil || !r.RQLite.Responsive {
|
||||
continue
|
||||
}
|
||||
if first {
|
||||
minTerm = r.RQLite.Term
|
||||
maxTerm = r.RQLite.Term
|
||||
first = true
|
||||
}
|
||||
if r.RQLite.Term < minTerm {
|
||||
minTerm = r.RQLite.Term
|
||||
}
|
||||
if r.RQLite.Term > maxTerm {
|
||||
maxTerm = r.RQLite.Term
|
||||
}
|
||||
first = false
|
||||
}
|
||||
if maxTerm-minTerm > 1 {
|
||||
return []Alert{{AlertWarning, "rqlite", "cluster",
|
||||
fmt.Sprintf("Raft term inconsistency: min=%d, max=%d (delta=%d)", minTerm, maxTerm, maxTerm-minTerm)}}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkAppliedIndexLag(reports []*report.NodeReport) []Alert {
|
||||
var maxApplied uint64
|
||||
for _, r := range reports {
|
||||
if r.RQLite != nil && r.RQLite.Applied > maxApplied {
|
||||
maxApplied = r.RQLite.Applied
|
||||
}
|
||||
}
|
||||
|
||||
var alerts []Alert
|
||||
for _, r := range reports {
|
||||
if r.RQLite == nil || !r.RQLite.Responsive {
|
||||
continue
|
||||
}
|
||||
lag := maxApplied - r.RQLite.Applied
|
||||
if lag > 100 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "rqlite", nodeHost(r),
|
||||
fmt.Sprintf("Applied index lag: %d behind leader (local=%d, max=%d)", lag, r.RQLite.Applied, maxApplied)})
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkWGPeerSymmetry(reports []*report.NodeReport) []Alert {
|
||||
// Build map: wg_ip -> set of peer public keys
|
||||
type nodeInfo struct {
|
||||
host string
|
||||
wgIP string
|
||||
peerKeys map[string]bool
|
||||
}
|
||||
var nodes []nodeInfo
|
||||
for _, r := range reports {
|
||||
if r.WireGuard == nil || !r.WireGuard.InterfaceUp {
|
||||
continue
|
||||
}
|
||||
ni := nodeInfo{host: nodeHost(r), wgIP: r.WireGuard.WgIP, peerKeys: map[string]bool{}}
|
||||
for _, p := range r.WireGuard.Peers {
|
||||
ni.peerKeys[p.PublicKey] = true
|
||||
}
|
||||
nodes = append(nodes, ni)
|
||||
}
|
||||
|
||||
// For WG peer symmetry, we check peer counts match (N-1 peers expected)
|
||||
var alerts []Alert
|
||||
expectedPeers := len(nodes) - 1
|
||||
for _, ni := range nodes {
|
||||
if len(ni.peerKeys) < expectedPeers {
|
||||
alerts = append(alerts, Alert{AlertCritical, "wireguard", ni.host,
|
||||
fmt.Sprintf("WG peer count mismatch: has %d peers, expected %d", len(ni.peerKeys), expectedPeers)})
|
||||
}
|
||||
}
|
||||
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkClockSkew(reports []*report.NodeReport) []Alert {
|
||||
var times []struct {
|
||||
host string
|
||||
t int64
|
||||
}
|
||||
for _, r := range reports {
|
||||
if r.System != nil && r.System.TimeUnix > 0 {
|
||||
times = append(times, struct {
|
||||
host string
|
||||
t int64
|
||||
}{nodeHost(r), r.System.TimeUnix})
|
||||
}
|
||||
}
|
||||
if len(times) < 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var minT, maxT int64 = times[0].t, times[0].t
|
||||
var minHost, maxHost string = times[0].host, times[0].host
|
||||
for _, t := range times[1:] {
|
||||
if t.t < minT {
|
||||
minT = t.t
|
||||
minHost = t.host
|
||||
}
|
||||
if t.t > maxT {
|
||||
maxT = t.t
|
||||
maxHost = t.host
|
||||
}
|
||||
}
|
||||
|
||||
delta := maxT - minT
|
||||
if delta > 5 {
|
||||
return []Alert{{AlertWarning, "system", "cluster",
|
||||
fmt.Sprintf("Clock skew: %ds between %s and %s", delta, minHost, maxHost)}}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkBinaryVersion(reports []*report.NodeReport) []Alert {
|
||||
versions := map[string][]string{} // version -> list of hosts
|
||||
for _, r := range reports {
|
||||
v := r.Version
|
||||
if v == "" {
|
||||
v = "unknown"
|
||||
}
|
||||
versions[v] = append(versions[v], nodeHost(r))
|
||||
}
|
||||
if len(versions) > 1 {
|
||||
msg := "Binary version mismatch:"
|
||||
for v, hosts := range versions {
|
||||
msg += fmt.Sprintf(" %s=%v", v, hosts)
|
||||
}
|
||||
return []Alert{{AlertWarning, "system", "cluster", msg}}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- Per-node checks ---
|
||||
|
||||
func checkNodeRQLite(r *report.NodeReport, host string) []Alert {
|
||||
if r.RQLite == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
if !r.RQLite.Responsive {
|
||||
alerts = append(alerts, Alert{AlertCritical, "rqlite", host, "RQLite not responding"})
|
||||
}
|
||||
if r.RQLite.Responsive && !r.RQLite.Ready {
|
||||
alerts = append(alerts, Alert{AlertWarning, "rqlite", host, "RQLite not ready (/readyz failed)"})
|
||||
}
|
||||
if r.RQLite.Responsive && !r.RQLite.StrongRead {
|
||||
alerts = append(alerts, Alert{AlertWarning, "rqlite", host, "Strong read failed"})
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeWireGuard(r *report.NodeReport, host string) []Alert {
|
||||
if r.WireGuard == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
if !r.WireGuard.InterfaceUp {
|
||||
alerts = append(alerts, Alert{AlertCritical, "wireguard", host, "WireGuard interface down"})
|
||||
return alerts
|
||||
}
|
||||
for _, p := range r.WireGuard.Peers {
|
||||
if p.HandshakeAgeSec > 180 && p.LatestHandshake > 0 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "wireguard", host,
|
||||
fmt.Sprintf("Stale WG handshake with peer %s: %ds ago", truncateKey(p.PublicKey), p.HandshakeAgeSec)})
|
||||
}
|
||||
if p.LatestHandshake == 0 {
|
||||
alerts = append(alerts, Alert{AlertCritical, "wireguard", host,
|
||||
fmt.Sprintf("WG peer %s has never handshaked", truncateKey(p.PublicKey))})
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeSystem(r *report.NodeReport, host string) []Alert {
|
||||
if r.System == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
if r.System.MemUsePct > 90 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "system", host,
|
||||
fmt.Sprintf("Memory at %d%%", r.System.MemUsePct)})
|
||||
}
|
||||
if r.System.DiskUsePct > 85 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "system", host,
|
||||
fmt.Sprintf("Disk at %d%%", r.System.DiskUsePct)})
|
||||
}
|
||||
if r.System.OOMKills > 0 {
|
||||
alerts = append(alerts, Alert{AlertCritical, "system", host,
|
||||
fmt.Sprintf("%d OOM kills detected", r.System.OOMKills)})
|
||||
}
|
||||
if r.System.SwapUsedMB > 0 && r.System.SwapTotalMB > 0 {
|
||||
pct := r.System.SwapUsedMB * 100 / r.System.SwapTotalMB
|
||||
if pct > 30 {
|
||||
alerts = append(alerts, Alert{AlertInfo, "system", host,
|
||||
fmt.Sprintf("Swap usage at %d%%", pct)})
|
||||
}
|
||||
}
|
||||
// High load
|
||||
if r.System.CPUCount > 0 {
|
||||
loadRatio := r.System.LoadAvg1 / float64(r.System.CPUCount)
|
||||
if loadRatio > 2.0 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "system", host,
|
||||
fmt.Sprintf("High load: %.1f (%.1fx CPU count)", r.System.LoadAvg1, loadRatio)})
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeServices(r *report.NodeReport, host string) []Alert {
|
||||
if r.Services == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
for _, svc := range r.Services.Services {
|
||||
if svc.ActiveState == "failed" {
|
||||
alerts = append(alerts, Alert{AlertCritical, "service", host,
|
||||
fmt.Sprintf("Service %s is FAILED", svc.Name)})
|
||||
} else if svc.ActiveState != "active" && svc.ActiveState != "" && svc.ActiveState != "unknown" {
|
||||
alerts = append(alerts, Alert{AlertWarning, "service", host,
|
||||
fmt.Sprintf("Service %s is %s", svc.Name, svc.ActiveState)})
|
||||
}
|
||||
if svc.RestartLoopRisk {
|
||||
alerts = append(alerts, Alert{AlertCritical, "service", host,
|
||||
fmt.Sprintf("Service %s restart loop: %d restarts, active for %ds", svc.Name, svc.NRestarts, svc.ActiveSinceSec)})
|
||||
}
|
||||
}
|
||||
for _, unit := range r.Services.FailedUnits {
|
||||
alerts = append(alerts, Alert{AlertWarning, "service", host,
|
||||
fmt.Sprintf("Failed systemd unit: %s", unit)})
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeDNS(r *report.NodeReport, host string) []Alert {
|
||||
if r.DNS == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
if !r.DNS.CoreDNSActive {
|
||||
alerts = append(alerts, Alert{AlertCritical, "dns", host, "CoreDNS is down"})
|
||||
}
|
||||
if !r.DNS.CaddyActive {
|
||||
alerts = append(alerts, Alert{AlertCritical, "dns", host, "Caddy is down"})
|
||||
}
|
||||
if r.DNS.BaseTLSDaysLeft >= 0 && r.DNS.BaseTLSDaysLeft < 14 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "dns", host,
|
||||
fmt.Sprintf("Base TLS cert expires in %d days", r.DNS.BaseTLSDaysLeft)})
|
||||
}
|
||||
if r.DNS.WildTLSDaysLeft >= 0 && r.DNS.WildTLSDaysLeft < 14 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "dns", host,
|
||||
fmt.Sprintf("Wildcard TLS cert expires in %d days", r.DNS.WildTLSDaysLeft)})
|
||||
}
|
||||
if r.DNS.CoreDNSActive && !r.DNS.SOAResolves {
|
||||
alerts = append(alerts, Alert{AlertWarning, "dns", host, "SOA record not resolving"})
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeAnyone(r *report.NodeReport, host string) []Alert {
|
||||
if r.Anyone == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
if (r.Anyone.RelayActive || r.Anyone.ClientActive) && !r.Anyone.Bootstrapped {
|
||||
alerts = append(alerts, Alert{AlertWarning, "anyone", host,
|
||||
fmt.Sprintf("Anyone bootstrap at %d%%", r.Anyone.BootstrapPct)})
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeProcesses(r *report.NodeReport, host string) []Alert {
|
||||
if r.Processes == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
if r.Processes.ZombieCount > 0 {
|
||||
alerts = append(alerts, Alert{AlertInfo, "system", host,
|
||||
fmt.Sprintf("%d zombie processes", r.Processes.ZombieCount)})
|
||||
}
|
||||
if r.Processes.OrphanCount > 0 {
|
||||
alerts = append(alerts, Alert{AlertInfo, "system", host,
|
||||
fmt.Sprintf("%d orphan orama processes", r.Processes.OrphanCount)})
|
||||
}
|
||||
if r.Processes.PanicCount > 0 {
|
||||
alerts = append(alerts, Alert{AlertCritical, "system", host,
|
||||
fmt.Sprintf("%d panic/fatal in orama-node logs (1h)", r.Processes.PanicCount)})
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeNamespaces(r *report.NodeReport, host string) []Alert {
|
||||
var alerts []Alert
|
||||
for _, ns := range r.Namespaces {
|
||||
if !ns.GatewayUp {
|
||||
alerts = append(alerts, Alert{AlertWarning, "namespace", host,
|
||||
fmt.Sprintf("Namespace %s gateway down", ns.Name)})
|
||||
}
|
||||
if !ns.RQLiteUp {
|
||||
alerts = append(alerts, Alert{AlertWarning, "namespace", host,
|
||||
fmt.Sprintf("Namespace %s RQLite down", ns.Name)})
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func checkNodeNetwork(r *report.NodeReport, host string) []Alert {
|
||||
if r.Network == nil {
|
||||
return nil
|
||||
}
|
||||
var alerts []Alert
|
||||
if !r.Network.UFWActive {
|
||||
alerts = append(alerts, Alert{AlertCritical, "network", host, "UFW firewall is inactive"})
|
||||
}
|
||||
if !r.Network.InternetReachable {
|
||||
alerts = append(alerts, Alert{AlertWarning, "network", host, "Internet not reachable (ping 8.8.8.8 failed)"})
|
||||
}
|
||||
if r.Network.TCPRetransRate > 5.0 {
|
||||
alerts = append(alerts, Alert{AlertWarning, "network", host,
|
||||
fmt.Sprintf("High TCP retransmission rate: %.1f%%", r.Network.TCPRetransRate)})
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
func truncateKey(key string) string {
|
||||
if len(key) > 8 {
|
||||
return key[:8] + "..."
|
||||
}
|
||||
return key
|
||||
}
|
||||
|
||||
115
pkg/cli/monitor/collector.go
Normal file
115
pkg/cli/monitor/collector.go
Normal file
@ -0,0 +1,115 @@
|
||||
package monitor
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
|
||||
"github.com/DeBrosOfficial/network/pkg/inspector"
|
||||
)
|
||||
|
||||
// CollectorConfig holds configuration for the collection pipeline.
|
||||
type CollectorConfig struct {
|
||||
ConfigPath string
|
||||
Env string
|
||||
NodeFilter string
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// CollectOnce runs `sudo orama node report --json` on all matching nodes
|
||||
// in parallel and returns a ClusterSnapshot.
|
||||
func CollectOnce(ctx context.Context, cfg CollectorConfig) (*ClusterSnapshot, error) {
|
||||
nodes, err := inspector.LoadNodes(cfg.ConfigPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load nodes: %w", err)
|
||||
}
|
||||
nodes = inspector.FilterByEnv(nodes, cfg.Env)
|
||||
if cfg.NodeFilter != "" {
|
||||
nodes = filterByHost(nodes, cfg.NodeFilter)
|
||||
}
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes found for env %q", cfg.Env)
|
||||
}
|
||||
|
||||
timeout := cfg.Timeout
|
||||
if timeout == 0 {
|
||||
timeout = 30 * time.Second
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
snap := &ClusterSnapshot{
|
||||
Environment: cfg.Env,
|
||||
CollectedAt: start,
|
||||
Nodes: make([]CollectionStatus, len(nodes)),
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i, node := range nodes {
|
||||
wg.Add(1)
|
||||
go func(idx int, n inspector.Node) {
|
||||
defer wg.Done()
|
||||
snap.Nodes[idx] = collectNodeReport(ctx, n, timeout)
|
||||
}(i, node)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
snap.Duration = time.Since(start)
|
||||
snap.Alerts = DeriveAlerts(snap)
|
||||
|
||||
return snap, nil
|
||||
}
|
||||
|
||||
// collectNodeReport SSHes into a single node and parses the JSON report.
|
||||
func collectNodeReport(ctx context.Context, node inspector.Node, timeout time.Duration) CollectionStatus {
|
||||
nodeCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
start := time.Now()
|
||||
result := inspector.RunSSH(nodeCtx, node, "sudo orama node report --json")
|
||||
|
||||
cs := CollectionStatus{
|
||||
Node: node,
|
||||
Duration: time.Since(start),
|
||||
Retries: result.Retries,
|
||||
}
|
||||
|
||||
if !result.OK() {
|
||||
cs.Error = fmt.Errorf("SSH failed (exit %d): %s", result.ExitCode, truncate(result.Stderr, 200))
|
||||
return cs
|
||||
}
|
||||
|
||||
var rpt report.NodeReport
|
||||
if err := json.Unmarshal([]byte(result.Stdout), &rpt); err != nil {
|
||||
cs.Error = fmt.Errorf("parse report JSON: %w (first 200 bytes: %s)", err, truncate(result.Stdout, 200))
|
||||
return cs
|
||||
}
|
||||
|
||||
// Enrich with node metadata from remote-nodes.conf
|
||||
if rpt.Hostname == "" {
|
||||
rpt.Hostname = node.Host
|
||||
}
|
||||
rpt.PublicIP = node.Host
|
||||
|
||||
cs.Report = &rpt
|
||||
return cs
|
||||
}
|
||||
|
||||
func filterByHost(nodes []inspector.Node, host string) []inspector.Node {
|
||||
var filtered []inspector.Node
|
||||
for _, n := range nodes {
|
||||
if n.Host == host {
|
||||
filtered = append(filtered, n)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func truncate(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
64
pkg/cli/monitor/display/alerts.go
Normal file
64
pkg/cli/monitor/display/alerts.go
Normal file
@ -0,0 +1,64 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// AlertsTable prints alerts sorted by severity to w.
|
||||
func AlertsTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
critCount, warnCount := countAlerts(snap.Alerts)
|
||||
|
||||
fmt.Fprintf(w, "%s\n", styleBold.Render(
|
||||
fmt.Sprintf("Alerts \u2014 %s (%d critical, %d warning)",
|
||||
snap.Environment, critCount, warnCount)))
|
||||
fmt.Fprintln(w, strings.Repeat("\u2550", 44))
|
||||
fmt.Fprintln(w)
|
||||
|
||||
if len(snap.Alerts) == 0 {
|
||||
fmt.Fprintln(w, styleGreen.Render(" No alerts"))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort by severity: critical first, then warning, then info
|
||||
sorted := make([]monitor.Alert, len(snap.Alerts))
|
||||
copy(sorted, snap.Alerts)
|
||||
sort.Slice(sorted, func(i, j int) bool {
|
||||
return severityRank(sorted[i].Severity) < severityRank(sorted[j].Severity)
|
||||
})
|
||||
|
||||
for _, a := range sorted {
|
||||
tag := severityTag(a.Severity)
|
||||
node := a.Node
|
||||
if node == "" {
|
||||
node = "cluster"
|
||||
}
|
||||
fmt.Fprintf(w, "%s %-18s %-12s %s\n",
|
||||
tag, node, a.Subsystem, a.Message)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AlertsJSON writes alerts as JSON.
|
||||
func AlertsJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
return writeJSON(w, snap.Alerts)
|
||||
}
|
||||
|
||||
// severityRank returns a sort rank for severity (lower = higher priority).
|
||||
func severityRank(s monitor.AlertSeverity) int {
|
||||
switch s {
|
||||
case monitor.AlertCritical:
|
||||
return 0
|
||||
case monitor.AlertWarning:
|
||||
return 1
|
||||
case monitor.AlertInfo:
|
||||
return 2
|
||||
default:
|
||||
return 3
|
||||
}
|
||||
}
|
||||
204
pkg/cli/monitor/display/cluster.go
Normal file
204
pkg/cli/monitor/display/cluster.go
Normal file
@ -0,0 +1,204 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// ClusterTable prints a cluster overview table to w.
|
||||
func ClusterTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
dur := snap.Duration.Seconds()
|
||||
fmt.Fprintf(w, "%s\n", styleBold.Render(
|
||||
fmt.Sprintf("Cluster Overview \u2014 %s (%d nodes, collected in %.1fs)",
|
||||
snap.Environment, snap.TotalCount(), dur)))
|
||||
fmt.Fprintln(w, strings.Repeat("\u2550", 60))
|
||||
fmt.Fprintln(w)
|
||||
|
||||
// Header
|
||||
fmt.Fprintf(w, "%-18s %-12s %-6s %-6s %-11s %-5s %s\n",
|
||||
styleHeader.Render("NODE"),
|
||||
styleHeader.Render("ROLE"),
|
||||
styleHeader.Render("MEM"),
|
||||
styleHeader.Render("DISK"),
|
||||
styleHeader.Render("RQLITE"),
|
||||
styleHeader.Render("WG"),
|
||||
styleHeader.Render("SERVICES"))
|
||||
fmt.Fprintln(w, separator(70))
|
||||
|
||||
// Healthy nodes
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil {
|
||||
continue
|
||||
}
|
||||
r := cs.Report
|
||||
if r == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
host := cs.Node.Host
|
||||
role := cs.Node.Role
|
||||
|
||||
// Memory %
|
||||
memStr := "--"
|
||||
if r.System != nil {
|
||||
memStr = fmt.Sprintf("%d%%", r.System.MemUsePct)
|
||||
}
|
||||
|
||||
// Disk %
|
||||
diskStr := "--"
|
||||
if r.System != nil {
|
||||
diskStr = fmt.Sprintf("%d%%", r.System.DiskUsePct)
|
||||
}
|
||||
|
||||
// RQLite state
|
||||
rqliteStr := "--"
|
||||
if r.RQLite != nil && r.RQLite.Responsive {
|
||||
rqliteStr = r.RQLite.RaftState
|
||||
} else if r.RQLite != nil {
|
||||
rqliteStr = styleRed.Render("DOWN")
|
||||
}
|
||||
|
||||
// WireGuard
|
||||
wgStr := statusIcon(r.WireGuard != nil && r.WireGuard.InterfaceUp)
|
||||
|
||||
// Services: active/total
|
||||
svcStr := "--"
|
||||
if r.Services != nil {
|
||||
active := 0
|
||||
total := len(r.Services.Services)
|
||||
for _, svc := range r.Services.Services {
|
||||
if svc.ActiveState == "active" {
|
||||
active++
|
||||
}
|
||||
}
|
||||
svcStr = fmt.Sprintf("%d/%d", active, total)
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "%-18s %-12s %-6s %-6s %-11s %-5s %s\n",
|
||||
host, role, memStr, diskStr, rqliteStr, wgStr, svcStr)
|
||||
}
|
||||
|
||||
// Unreachable nodes
|
||||
failed := snap.Failed()
|
||||
if len(failed) > 0 {
|
||||
fmt.Fprintln(w)
|
||||
for _, cs := range failed {
|
||||
fmt.Fprintf(w, "%-18s %-12s %s\n",
|
||||
styleRed.Render(cs.Node.Host),
|
||||
cs.Node.Role,
|
||||
styleRed.Render("UNREACHABLE"))
|
||||
}
|
||||
}
|
||||
|
||||
// Alerts summary
|
||||
critCount, warnCount := countAlerts(snap.Alerts)
|
||||
fmt.Fprintln(w)
|
||||
fmt.Fprintf(w, "Alerts: %s critical, %s warning\n",
|
||||
alertCountStr(critCount, monitor.AlertCritical),
|
||||
alertCountStr(warnCount, monitor.AlertWarning))
|
||||
|
||||
for _, a := range snap.Alerts {
|
||||
if a.Severity == monitor.AlertCritical || a.Severity == monitor.AlertWarning {
|
||||
tag := severityTag(a.Severity)
|
||||
fmt.Fprintf(w, " %s %s: %s\n", tag, a.Node, a.Message)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ClusterJSON writes the cluster snapshot as JSON.
|
||||
func ClusterJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
type clusterEntry struct {
|
||||
Host string `json:"host"`
|
||||
Role string `json:"role"`
|
||||
MemPct int `json:"mem_pct"`
|
||||
DiskPct int `json:"disk_pct"`
|
||||
RQLite string `json:"rqlite_state"`
|
||||
WGUp bool `json:"wg_up"`
|
||||
Services string `json:"services"`
|
||||
Status string `json:"status"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
var entries []clusterEntry
|
||||
for _, cs := range snap.Nodes {
|
||||
e := clusterEntry{
|
||||
Host: cs.Node.Host,
|
||||
Role: cs.Node.Role,
|
||||
}
|
||||
if cs.Error != nil {
|
||||
e.Status = "unreachable"
|
||||
e.Error = cs.Error.Error()
|
||||
entries = append(entries, e)
|
||||
continue
|
||||
}
|
||||
r := cs.Report
|
||||
if r == nil {
|
||||
e.Status = "unreachable"
|
||||
entries = append(entries, e)
|
||||
continue
|
||||
}
|
||||
e.Status = "ok"
|
||||
if r.System != nil {
|
||||
e.MemPct = r.System.MemUsePct
|
||||
e.DiskPct = r.System.DiskUsePct
|
||||
}
|
||||
if r.RQLite != nil && r.RQLite.Responsive {
|
||||
e.RQLite = r.RQLite.RaftState
|
||||
}
|
||||
e.WGUp = r.WireGuard != nil && r.WireGuard.InterfaceUp
|
||||
if r.Services != nil {
|
||||
active := 0
|
||||
total := len(r.Services.Services)
|
||||
for _, svc := range r.Services.Services {
|
||||
if svc.ActiveState == "active" {
|
||||
active++
|
||||
}
|
||||
}
|
||||
e.Services = fmt.Sprintf("%d/%d", active, total)
|
||||
}
|
||||
entries = append(entries, e)
|
||||
}
|
||||
|
||||
return writeJSON(w, entries)
|
||||
}
|
||||
|
||||
// countAlerts returns the number of critical and warning alerts.
|
||||
func countAlerts(alerts []monitor.Alert) (crit, warn int) {
|
||||
for _, a := range alerts {
|
||||
switch a.Severity {
|
||||
case monitor.AlertCritical:
|
||||
crit++
|
||||
case monitor.AlertWarning:
|
||||
warn++
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// severityTag returns a colored tag like [CRIT], [WARN], [INFO].
|
||||
func severityTag(s monitor.AlertSeverity) string {
|
||||
switch s {
|
||||
case monitor.AlertCritical:
|
||||
return styleRed.Render("[CRIT]")
|
||||
case monitor.AlertWarning:
|
||||
return styleYellow.Render("[WARN]")
|
||||
case monitor.AlertInfo:
|
||||
return styleMuted.Render("[INFO]")
|
||||
default:
|
||||
return styleMuted.Render("[????]")
|
||||
}
|
||||
}
|
||||
|
||||
// alertCountStr renders the count with appropriate color.
|
||||
func alertCountStr(count int, sev monitor.AlertSeverity) string {
|
||||
s := fmt.Sprintf("%d", count)
|
||||
if count > 0 {
|
||||
return severityColor(sev).Render(s)
|
||||
}
|
||||
return s
|
||||
}
|
||||
129
pkg/cli/monitor/display/dns.go
Normal file
129
pkg/cli/monitor/display/dns.go
Normal file
@ -0,0 +1,129 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// DNSTable prints DNS status for nameserver nodes to w.
|
||||
func DNSTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
fmt.Fprintf(w, "%s\n", styleBold.Render(
|
||||
fmt.Sprintf("DNS Status \u2014 %s", snap.Environment)))
|
||||
fmt.Fprintln(w, strings.Repeat("\u2550", 22))
|
||||
fmt.Fprintln(w)
|
||||
|
||||
// Header
|
||||
fmt.Fprintf(w, "%-18s %-9s %-7s %-5s %-5s %-10s %-10s %s\n",
|
||||
styleHeader.Render("NODE"),
|
||||
styleHeader.Render("COREDNS"),
|
||||
styleHeader.Render("CADDY"),
|
||||
styleHeader.Render("SOA"),
|
||||
styleHeader.Render("NS"),
|
||||
styleHeader.Render("WILDCARD"),
|
||||
styleHeader.Render("BASE TLS"),
|
||||
styleHeader.Render("WILD TLS"))
|
||||
fmt.Fprintln(w, separator(78))
|
||||
|
||||
found := false
|
||||
for _, cs := range snap.Nodes {
|
||||
// Only show nameserver nodes
|
||||
if !cs.Node.IsNameserver() {
|
||||
continue
|
||||
}
|
||||
found = true
|
||||
|
||||
if cs.Error != nil || cs.Report == nil {
|
||||
fmt.Fprintf(w, "%-18s %s\n",
|
||||
styleRed.Render(cs.Node.Host),
|
||||
styleRed.Render("UNREACHABLE"))
|
||||
continue
|
||||
}
|
||||
|
||||
r := cs.Report
|
||||
if r.DNS == nil {
|
||||
fmt.Fprintf(w, "%-18s %s\n",
|
||||
cs.Node.Host,
|
||||
styleMuted.Render("no DNS data"))
|
||||
continue
|
||||
}
|
||||
|
||||
dns := r.DNS
|
||||
fmt.Fprintf(w, "%-18s %-9s %-7s %-5s %-5s %-10s %-10s %s\n",
|
||||
cs.Node.Host,
|
||||
statusIcon(dns.CoreDNSActive),
|
||||
statusIcon(dns.CaddyActive),
|
||||
statusIcon(dns.SOAResolves),
|
||||
statusIcon(dns.NSResolves),
|
||||
statusIcon(dns.WildcardResolves),
|
||||
tlsDaysStr(dns.BaseTLSDaysLeft),
|
||||
tlsDaysStr(dns.WildTLSDaysLeft))
|
||||
}
|
||||
|
||||
if !found {
|
||||
fmt.Fprintln(w, styleMuted.Render(" No nameserver nodes found"))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DNSJSON writes DNS status as JSON.
|
||||
func DNSJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
type dnsEntry struct {
|
||||
Host string `json:"host"`
|
||||
CoreDNSActive bool `json:"coredns_active"`
|
||||
CaddyActive bool `json:"caddy_active"`
|
||||
SOAResolves bool `json:"soa_resolves"`
|
||||
NSResolves bool `json:"ns_resolves"`
|
||||
WildcardResolves bool `json:"wildcard_resolves"`
|
||||
BaseTLSDaysLeft int `json:"base_tls_days_left"`
|
||||
WildTLSDaysLeft int `json:"wild_tls_days_left"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
var entries []dnsEntry
|
||||
for _, cs := range snap.Nodes {
|
||||
if !cs.Node.IsNameserver() {
|
||||
continue
|
||||
}
|
||||
e := dnsEntry{Host: cs.Node.Host}
|
||||
if cs.Error != nil {
|
||||
e.Error = cs.Error.Error()
|
||||
entries = append(entries, e)
|
||||
continue
|
||||
}
|
||||
if cs.Report == nil || cs.Report.DNS == nil {
|
||||
entries = append(entries, e)
|
||||
continue
|
||||
}
|
||||
dns := cs.Report.DNS
|
||||
e.CoreDNSActive = dns.CoreDNSActive
|
||||
e.CaddyActive = dns.CaddyActive
|
||||
e.SOAResolves = dns.SOAResolves
|
||||
e.NSResolves = dns.NSResolves
|
||||
e.WildcardResolves = dns.WildcardResolves
|
||||
e.BaseTLSDaysLeft = dns.BaseTLSDaysLeft
|
||||
e.WildTLSDaysLeft = dns.WildTLSDaysLeft
|
||||
entries = append(entries, e)
|
||||
}
|
||||
|
||||
return writeJSON(w, entries)
|
||||
}
|
||||
|
||||
// tlsDaysStr formats TLS days left with appropriate coloring.
|
||||
func tlsDaysStr(days int) string {
|
||||
if days < 0 {
|
||||
return styleMuted.Render("--")
|
||||
}
|
||||
s := fmt.Sprintf("%d days", days)
|
||||
switch {
|
||||
case days < 7:
|
||||
return styleRed.Render(s)
|
||||
case days < 30:
|
||||
return styleYellow.Render(s)
|
||||
default:
|
||||
return styleGreen.Render(s)
|
||||
}
|
||||
}
|
||||
194
pkg/cli/monitor/display/mesh.go
Normal file
194
pkg/cli/monitor/display/mesh.go
Normal file
@ -0,0 +1,194 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// MeshTable prints WireGuard mesh status to w.
|
||||
func MeshTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
fmt.Fprintf(w, "%s\n", styleBold.Render(
|
||||
fmt.Sprintf("WireGuard Mesh \u2014 %s", snap.Environment)))
|
||||
fmt.Fprintln(w, strings.Repeat("\u2550", 28))
|
||||
fmt.Fprintln(w)
|
||||
|
||||
// Header
|
||||
fmt.Fprintf(w, "%-18s %-12s %-7s %-7s %s\n",
|
||||
styleHeader.Render("NODE"),
|
||||
styleHeader.Render("WG IP"),
|
||||
styleHeader.Render("PORT"),
|
||||
styleHeader.Render("PEERS"),
|
||||
styleHeader.Render("STATUS"))
|
||||
fmt.Fprintln(w, separator(54))
|
||||
|
||||
// Collect mesh info for peer details
|
||||
type meshNode struct {
|
||||
host string
|
||||
wgIP string
|
||||
port int
|
||||
peers int
|
||||
total int
|
||||
healthy bool
|
||||
}
|
||||
var meshNodes []meshNode
|
||||
|
||||
expectedPeers := snap.HealthyCount() - 1
|
||||
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil || cs.Report == nil {
|
||||
continue
|
||||
}
|
||||
r := cs.Report
|
||||
if r.WireGuard == nil {
|
||||
fmt.Fprintf(w, "%-18s %s\n", cs.Node.Host, styleMuted.Render("no WireGuard"))
|
||||
continue
|
||||
}
|
||||
|
||||
wg := r.WireGuard
|
||||
peerCount := wg.PeerCount
|
||||
allOK := wg.InterfaceUp
|
||||
if allOK {
|
||||
for _, p := range wg.Peers {
|
||||
if p.LatestHandshake == 0 || p.HandshakeAgeSec > 180 {
|
||||
allOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mn := meshNode{
|
||||
host: cs.Node.Host,
|
||||
wgIP: wg.WgIP,
|
||||
port: wg.ListenPort,
|
||||
peers: peerCount,
|
||||
total: expectedPeers,
|
||||
healthy: allOK,
|
||||
}
|
||||
meshNodes = append(meshNodes, mn)
|
||||
|
||||
peerStr := fmt.Sprintf("%d/%d", peerCount, expectedPeers)
|
||||
statusStr := statusIcon(allOK)
|
||||
if !wg.InterfaceUp {
|
||||
statusStr = styleRed.Render("DOWN")
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "%-18s %-12s %-7d %-7s %s\n",
|
||||
cs.Node.Host, wg.WgIP, wg.ListenPort, peerStr, statusStr)
|
||||
}
|
||||
|
||||
// Peer details
|
||||
fmt.Fprintln(w)
|
||||
fmt.Fprintln(w, styleBold.Render("Peer Details:"))
|
||||
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil || cs.Report == nil || cs.Report.WireGuard == nil {
|
||||
continue
|
||||
}
|
||||
wg := cs.Report.WireGuard
|
||||
if !wg.InterfaceUp {
|
||||
continue
|
||||
}
|
||||
localIP := wg.WgIP
|
||||
for _, p := range wg.Peers {
|
||||
hsAge := formatDuration(p.HandshakeAgeSec)
|
||||
rx := formatBytes(p.TransferRx)
|
||||
tx := formatBytes(p.TransferTx)
|
||||
|
||||
peerIP := p.AllowedIPs
|
||||
// Strip CIDR if present
|
||||
if idx := strings.Index(peerIP, "/"); idx > 0 {
|
||||
peerIP = peerIP[:idx]
|
||||
}
|
||||
|
||||
hsColor := styleGreen
|
||||
if p.LatestHandshake == 0 {
|
||||
hsAge = "never"
|
||||
hsColor = styleRed
|
||||
} else if p.HandshakeAgeSec > 180 {
|
||||
hsColor = styleYellow
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, " %s \u2194 %s: handshake %s, rx: %s, tx: %s\n",
|
||||
localIP, peerIP, hsColor.Render(hsAge), rx, tx)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MeshJSON writes the WireGuard mesh as JSON.
|
||||
func MeshJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
type peerEntry struct {
|
||||
AllowedIPs string `json:"allowed_ips"`
|
||||
HandshakeAgeSec int64 `json:"handshake_age_sec"`
|
||||
TransferRxBytes int64 `json:"transfer_rx_bytes"`
|
||||
TransferTxBytes int64 `json:"transfer_tx_bytes"`
|
||||
}
|
||||
type meshEntry struct {
|
||||
Host string `json:"host"`
|
||||
WgIP string `json:"wg_ip"`
|
||||
ListenPort int `json:"listen_port"`
|
||||
PeerCount int `json:"peer_count"`
|
||||
Up bool `json:"up"`
|
||||
Peers []peerEntry `json:"peers,omitempty"`
|
||||
}
|
||||
|
||||
var entries []meshEntry
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil || cs.Report == nil || cs.Report.WireGuard == nil {
|
||||
continue
|
||||
}
|
||||
wg := cs.Report.WireGuard
|
||||
e := meshEntry{
|
||||
Host: cs.Node.Host,
|
||||
WgIP: wg.WgIP,
|
||||
ListenPort: wg.ListenPort,
|
||||
PeerCount: wg.PeerCount,
|
||||
Up: wg.InterfaceUp,
|
||||
}
|
||||
for _, p := range wg.Peers {
|
||||
e.Peers = append(e.Peers, peerEntry{
|
||||
AllowedIPs: p.AllowedIPs,
|
||||
HandshakeAgeSec: p.HandshakeAgeSec,
|
||||
TransferRxBytes: p.TransferRx,
|
||||
TransferTxBytes: p.TransferTx,
|
||||
})
|
||||
}
|
||||
entries = append(entries, e)
|
||||
}
|
||||
|
||||
return writeJSON(w, entries)
|
||||
}
|
||||
|
||||
// formatDuration formats seconds into a human-readable string.
|
||||
func formatDuration(sec int64) string {
|
||||
if sec < 60 {
|
||||
return fmt.Sprintf("%ds ago", sec)
|
||||
}
|
||||
if sec < 3600 {
|
||||
return fmt.Sprintf("%dm ago", sec/60)
|
||||
}
|
||||
return fmt.Sprintf("%dh ago", sec/3600)
|
||||
}
|
||||
|
||||
// formatBytes formats bytes into a human-readable string.
|
||||
func formatBytes(b int64) string {
|
||||
const (
|
||||
kb = 1024
|
||||
mb = 1024 * kb
|
||||
gb = 1024 * mb
|
||||
)
|
||||
switch {
|
||||
case b >= gb:
|
||||
return fmt.Sprintf("%.1fGB", float64(b)/float64(gb))
|
||||
case b >= mb:
|
||||
return fmt.Sprintf("%.1fMB", float64(b)/float64(mb))
|
||||
case b >= kb:
|
||||
return fmt.Sprintf("%.1fKB", float64(b)/float64(kb))
|
||||
default:
|
||||
return fmt.Sprintf("%dB", b)
|
||||
}
|
||||
}
|
||||
114
pkg/cli/monitor/display/namespaces.go
Normal file
114
pkg/cli/monitor/display/namespaces.go
Normal file
@ -0,0 +1,114 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// NamespacesTable prints per-namespace health across nodes to w.
|
||||
func NamespacesTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
fmt.Fprintf(w, "%s\n", styleBold.Render(
|
||||
fmt.Sprintf("Namespace Health \u2014 %s", snap.Environment)))
|
||||
fmt.Fprintln(w, strings.Repeat("\u2550", 28))
|
||||
fmt.Fprintln(w)
|
||||
|
||||
// Collect all namespace entries across nodes
|
||||
type nsRow struct {
|
||||
namespace string
|
||||
host string
|
||||
rqlite string
|
||||
olric string
|
||||
gateway string
|
||||
}
|
||||
|
||||
var rows []nsRow
|
||||
nsNames := map[string]bool{}
|
||||
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil || cs.Report == nil {
|
||||
continue
|
||||
}
|
||||
for _, ns := range cs.Report.Namespaces {
|
||||
nsNames[ns.Name] = true
|
||||
|
||||
rqliteStr := statusIcon(ns.RQLiteUp)
|
||||
if ns.RQLiteUp && ns.RQLiteState != "" {
|
||||
rqliteStr = ns.RQLiteState
|
||||
}
|
||||
|
||||
rows = append(rows, nsRow{
|
||||
namespace: ns.Name,
|
||||
host: cs.Node.Host,
|
||||
rqlite: rqliteStr,
|
||||
olric: statusIcon(ns.OlricUp),
|
||||
gateway: statusIcon(ns.GatewayUp),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(rows) == 0 {
|
||||
fmt.Fprintln(w, styleMuted.Render(" No namespaces found"))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort by namespace name, then host
|
||||
sort.Slice(rows, func(i, j int) bool {
|
||||
if rows[i].namespace != rows[j].namespace {
|
||||
return rows[i].namespace < rows[j].namespace
|
||||
}
|
||||
return rows[i].host < rows[j].host
|
||||
})
|
||||
|
||||
// Header
|
||||
fmt.Fprintf(w, "%-13s %-18s %-11s %-7s %s\n",
|
||||
styleHeader.Render("NAMESPACE"),
|
||||
styleHeader.Render("NODE"),
|
||||
styleHeader.Render("RQLITE"),
|
||||
styleHeader.Render("OLRIC"),
|
||||
styleHeader.Render("GATEWAY"))
|
||||
fmt.Fprintln(w, separator(58))
|
||||
|
||||
for _, r := range rows {
|
||||
fmt.Fprintf(w, "%-13s %-18s %-11s %-7s %s\n",
|
||||
r.namespace, r.host, r.rqlite, r.olric, r.gateway)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NamespacesJSON writes namespace health as JSON.
|
||||
func NamespacesJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
type nsEntry struct {
|
||||
Namespace string `json:"namespace"`
|
||||
Host string `json:"host"`
|
||||
RQLiteUp bool `json:"rqlite_up"`
|
||||
RQLiteState string `json:"rqlite_state,omitempty"`
|
||||
OlricUp bool `json:"olric_up"`
|
||||
GatewayUp bool `json:"gateway_up"`
|
||||
GatewayStatus int `json:"gateway_status,omitempty"`
|
||||
}
|
||||
|
||||
var entries []nsEntry
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil || cs.Report == nil {
|
||||
continue
|
||||
}
|
||||
for _, ns := range cs.Report.Namespaces {
|
||||
entries = append(entries, nsEntry{
|
||||
Namespace: ns.Name,
|
||||
Host: cs.Node.Host,
|
||||
RQLiteUp: ns.RQLiteUp,
|
||||
RQLiteState: ns.RQLiteState,
|
||||
OlricUp: ns.OlricUp,
|
||||
GatewayUp: ns.GatewayUp,
|
||||
GatewayStatus: ns.GatewayStatus,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return writeJSON(w, entries)
|
||||
}
|
||||
167
pkg/cli/monitor/display/node.go
Normal file
167
pkg/cli/monitor/display/node.go
Normal file
@ -0,0 +1,167 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// NodeTable prints detailed per-node information to w.
|
||||
func NodeTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
for i, cs := range snap.Nodes {
|
||||
if i > 0 {
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
host := cs.Node.Host
|
||||
role := cs.Node.Role
|
||||
|
||||
if cs.Error != nil {
|
||||
fmt.Fprintf(w, "%s (%s)\n", styleRed.Render("Node: "+host), role)
|
||||
fmt.Fprintf(w, " %s\n", styleRed.Render(fmt.Sprintf("UNREACHABLE: %v", cs.Error)))
|
||||
continue
|
||||
}
|
||||
|
||||
r := cs.Report
|
||||
if r == nil {
|
||||
fmt.Fprintf(w, "%s (%s)\n", styleRed.Render("Node: "+host), role)
|
||||
fmt.Fprintf(w, " %s\n", styleRed.Render("No report available"))
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "%s\n", styleBold.Render(fmt.Sprintf("Node: %s (%s)", host, role)))
|
||||
|
||||
// System
|
||||
if r.System != nil {
|
||||
sys := r.System
|
||||
fmt.Fprintf(w, " System: CPU %d | Load %.2f | Mem %d%% (%d/%d MB) | Disk %d%%\n",
|
||||
sys.CPUCount, sys.LoadAvg1, sys.MemUsePct, sys.MemUsedMB, sys.MemTotalMB, sys.DiskUsePct)
|
||||
} else {
|
||||
fmt.Fprintln(w, " System: "+styleMuted.Render("no data"))
|
||||
}
|
||||
|
||||
// RQLite
|
||||
if r.RQLite != nil {
|
||||
rq := r.RQLite
|
||||
readyStr := styleRed.Render("Not Ready")
|
||||
if rq.Ready {
|
||||
readyStr = styleGreen.Render("Ready")
|
||||
}
|
||||
if rq.Responsive {
|
||||
fmt.Fprintf(w, " RQLite: %s | Term %d | Applied %d | Peers %d | %s\n",
|
||||
rq.RaftState, rq.Term, rq.Applied, rq.NumPeers, readyStr)
|
||||
} else {
|
||||
fmt.Fprintf(w, " RQLite: %s\n", styleRed.Render("NOT RESPONDING"))
|
||||
}
|
||||
} else {
|
||||
fmt.Fprintln(w, " RQLite: "+styleMuted.Render("not configured"))
|
||||
}
|
||||
|
||||
// WireGuard
|
||||
if r.WireGuard != nil {
|
||||
wg := r.WireGuard
|
||||
if wg.InterfaceUp {
|
||||
// Check handshakes
|
||||
hsOK := true
|
||||
for _, p := range wg.Peers {
|
||||
if p.LatestHandshake == 0 || p.HandshakeAgeSec > 180 {
|
||||
hsOK = false
|
||||
break
|
||||
}
|
||||
}
|
||||
hsStr := statusIcon(hsOK)
|
||||
fmt.Fprintf(w, " WireGuard: UP | %s | %d peers | handshakes %s\n",
|
||||
wg.WgIP, wg.PeerCount, hsStr)
|
||||
} else {
|
||||
fmt.Fprintf(w, " WireGuard: %s\n", styleRed.Render("DOWN"))
|
||||
}
|
||||
} else {
|
||||
fmt.Fprintln(w, " WireGuard: "+styleMuted.Render("not configured"))
|
||||
}
|
||||
|
||||
// Olric
|
||||
if r.Olric != nil {
|
||||
ol := r.Olric
|
||||
stateStr := styleRed.Render("inactive")
|
||||
if ol.ServiceActive {
|
||||
stateStr = styleGreen.Render("active")
|
||||
}
|
||||
fmt.Fprintf(w, " Olric: %s | %d members\n", stateStr, ol.MemberCount)
|
||||
} else {
|
||||
fmt.Fprintln(w, " Olric: "+styleMuted.Render("not configured"))
|
||||
}
|
||||
|
||||
// IPFS
|
||||
if r.IPFS != nil {
|
||||
ipfs := r.IPFS
|
||||
daemonStr := styleRed.Render("inactive")
|
||||
if ipfs.DaemonActive {
|
||||
daemonStr = styleGreen.Render("active")
|
||||
}
|
||||
clusterStr := styleRed.Render("DOWN")
|
||||
if ipfs.ClusterActive {
|
||||
clusterStr = styleGreen.Render("OK")
|
||||
}
|
||||
fmt.Fprintf(w, " IPFS: %s | %d swarm peers | cluster %s\n",
|
||||
daemonStr, ipfs.SwarmPeerCount, clusterStr)
|
||||
} else {
|
||||
fmt.Fprintln(w, " IPFS: "+styleMuted.Render("not configured"))
|
||||
}
|
||||
|
||||
// Anyone
|
||||
if r.Anyone != nil {
|
||||
an := r.Anyone
|
||||
mode := an.Mode
|
||||
if mode == "" {
|
||||
if an.RelayActive {
|
||||
mode = "relay"
|
||||
} else if an.ClientActive {
|
||||
mode = "client"
|
||||
} else {
|
||||
mode = "inactive"
|
||||
}
|
||||
}
|
||||
bootStr := styleRed.Render("not bootstrapped")
|
||||
if an.Bootstrapped {
|
||||
bootStr = styleGreen.Render("bootstrapped")
|
||||
}
|
||||
fmt.Fprintf(w, " Anyone: %s | %s\n", mode, bootStr)
|
||||
} else {
|
||||
fmt.Fprintln(w, " Anyone: "+styleMuted.Render("not configured"))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NodeJSON writes the node details as JSON.
|
||||
func NodeJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
type nodeDetail struct {
|
||||
Host string `json:"host"`
|
||||
Role string `json:"role"`
|
||||
Status string `json:"status"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Report interface{} `json:"report,omitempty"`
|
||||
}
|
||||
|
||||
var entries []nodeDetail
|
||||
for _, cs := range snap.Nodes {
|
||||
e := nodeDetail{
|
||||
Host: cs.Node.Host,
|
||||
Role: cs.Node.Role,
|
||||
}
|
||||
if cs.Error != nil {
|
||||
e.Status = "unreachable"
|
||||
e.Error = cs.Error.Error()
|
||||
} else if cs.Report != nil {
|
||||
e.Status = "ok"
|
||||
e.Report = cs.Report
|
||||
} else {
|
||||
e.Status = "unknown"
|
||||
}
|
||||
entries = append(entries, e)
|
||||
}
|
||||
|
||||
return writeJSON(w, entries)
|
||||
}
|
||||
182
pkg/cli/monitor/display/report.go
Normal file
182
pkg/cli/monitor/display/report.go
Normal file
@ -0,0 +1,182 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
|
||||
)
|
||||
|
||||
type fullReport struct {
|
||||
Meta struct {
|
||||
Environment string `json:"environment"`
|
||||
CollectedAt time.Time `json:"collected_at"`
|
||||
DurationSec float64 `json:"duration_seconds"`
|
||||
NodeCount int `json:"node_count"`
|
||||
HealthyCount int `json:"healthy_count"`
|
||||
FailedCount int `json:"failed_count"`
|
||||
} `json:"meta"`
|
||||
Summary struct {
|
||||
RQLiteLeader string `json:"rqlite_leader"`
|
||||
RQLiteQuorum string `json:"rqlite_quorum"`
|
||||
WGMeshStatus string `json:"wg_mesh_status"`
|
||||
ServiceHealth string `json:"service_health"`
|
||||
CriticalAlerts int `json:"critical_alerts"`
|
||||
WarningAlerts int `json:"warning_alerts"`
|
||||
} `json:"summary"`
|
||||
Alerts []monitor.Alert `json:"alerts"`
|
||||
Nodes []nodeEntry `json:"nodes"`
|
||||
}
|
||||
|
||||
type nodeEntry struct {
|
||||
Host string `json:"host"`
|
||||
Role string `json:"role"`
|
||||
Status string `json:"status"` // "ok", "unreachable", "degraded"
|
||||
Report *report.NodeReport `json:"report,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// FullReport outputs the LLM-optimized JSON report to w.
|
||||
func FullReport(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
fr := fullReport{}
|
||||
|
||||
// Meta
|
||||
fr.Meta.Environment = snap.Environment
|
||||
fr.Meta.CollectedAt = snap.CollectedAt
|
||||
fr.Meta.DurationSec = snap.Duration.Seconds()
|
||||
fr.Meta.NodeCount = snap.TotalCount()
|
||||
fr.Meta.HealthyCount = snap.HealthyCount()
|
||||
fr.Meta.FailedCount = len(snap.Failed())
|
||||
|
||||
// Summary
|
||||
fr.Summary.RQLiteLeader = findRQLiteLeader(snap)
|
||||
fr.Summary.RQLiteQuorum = computeQuorumStatus(snap)
|
||||
fr.Summary.WGMeshStatus = computeWGMeshStatus(snap)
|
||||
fr.Summary.ServiceHealth = computeServiceHealth(snap)
|
||||
|
||||
crit, warn := countAlerts(snap.Alerts)
|
||||
fr.Summary.CriticalAlerts = crit
|
||||
fr.Summary.WarningAlerts = warn
|
||||
|
||||
// Alerts
|
||||
fr.Alerts = snap.Alerts
|
||||
|
||||
// Build set of hosts with critical alerts for "degraded" detection
|
||||
criticalHosts := map[string]bool{}
|
||||
for _, a := range snap.Alerts {
|
||||
if a.Severity == monitor.AlertCritical && a.Node != "" && a.Node != "cluster" {
|
||||
criticalHosts[a.Node] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Nodes
|
||||
for _, cs := range snap.Nodes {
|
||||
ne := nodeEntry{
|
||||
Host: cs.Node.Host,
|
||||
Role: cs.Node.Role,
|
||||
}
|
||||
if cs.Error != nil {
|
||||
ne.Status = "unreachable"
|
||||
ne.Error = cs.Error.Error()
|
||||
} else if cs.Report != nil {
|
||||
if criticalHosts[cs.Node.Host] {
|
||||
ne.Status = "degraded"
|
||||
} else {
|
||||
ne.Status = "ok"
|
||||
}
|
||||
ne.Report = cs.Report
|
||||
} else {
|
||||
ne.Status = "unreachable"
|
||||
}
|
||||
fr.Nodes = append(fr.Nodes, ne)
|
||||
}
|
||||
|
||||
return writeJSON(w, fr)
|
||||
}
|
||||
|
||||
// findRQLiteLeader returns the host of the RQLite leader, or "none".
|
||||
func findRQLiteLeader(snap *monitor.ClusterSnapshot) string {
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Report != nil && cs.Report.RQLite != nil && cs.Report.RQLite.RaftState == "Leader" {
|
||||
return cs.Node.Host
|
||||
}
|
||||
}
|
||||
return "none"
|
||||
}
|
||||
|
||||
// computeQuorumStatus returns "ok", "degraded", or "lost".
|
||||
func computeQuorumStatus(snap *monitor.ClusterSnapshot) string {
|
||||
total := 0
|
||||
responsive := 0
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Report != nil && cs.Report.RQLite != nil {
|
||||
total++
|
||||
if cs.Report.RQLite.Responsive {
|
||||
responsive++
|
||||
}
|
||||
}
|
||||
}
|
||||
if total == 0 {
|
||||
return "unknown"
|
||||
}
|
||||
quorum := (total / 2) + 1
|
||||
if responsive >= quorum {
|
||||
return "ok"
|
||||
}
|
||||
if responsive > 0 {
|
||||
return "degraded"
|
||||
}
|
||||
return "lost"
|
||||
}
|
||||
|
||||
// computeWGMeshStatus returns "ok", "degraded", or "down".
|
||||
func computeWGMeshStatus(snap *monitor.ClusterSnapshot) string {
|
||||
totalWG := 0
|
||||
upCount := 0
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Report != nil && cs.Report.WireGuard != nil {
|
||||
totalWG++
|
||||
if cs.Report.WireGuard.InterfaceUp {
|
||||
upCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
if totalWG == 0 {
|
||||
return "unknown"
|
||||
}
|
||||
if upCount == totalWG {
|
||||
return "ok"
|
||||
}
|
||||
if upCount > 0 {
|
||||
return "degraded"
|
||||
}
|
||||
return "down"
|
||||
}
|
||||
|
||||
// computeServiceHealth returns "ok", "degraded", or "critical".
|
||||
func computeServiceHealth(snap *monitor.ClusterSnapshot) string {
|
||||
totalSvc := 0
|
||||
failedSvc := 0
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Report == nil || cs.Report.Services == nil {
|
||||
continue
|
||||
}
|
||||
for _, svc := range cs.Report.Services.Services {
|
||||
totalSvc++
|
||||
if svc.ActiveState == "failed" {
|
||||
failedSvc++
|
||||
}
|
||||
}
|
||||
}
|
||||
if totalSvc == 0 {
|
||||
return "unknown"
|
||||
}
|
||||
if failedSvc == 0 {
|
||||
return "ok"
|
||||
}
|
||||
if failedSvc < totalSvc/2 {
|
||||
return "degraded"
|
||||
}
|
||||
return "critical"
|
||||
}
|
||||
131
pkg/cli/monitor/display/service.go
Normal file
131
pkg/cli/monitor/display/service.go
Normal file
@ -0,0 +1,131 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// ServiceTable prints a cross-node service status matrix to w.
|
||||
func ServiceTable(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
fmt.Fprintf(w, "%s\n", styleBold.Render(
|
||||
fmt.Sprintf("Service Status Matrix \u2014 %s", snap.Environment)))
|
||||
fmt.Fprintln(w, strings.Repeat("\u2550", 36))
|
||||
fmt.Fprintln(w)
|
||||
|
||||
// Collect all service names and build per-host maps
|
||||
type hostServices struct {
|
||||
host string
|
||||
shortIP string
|
||||
services map[string]string // name -> active_state
|
||||
}
|
||||
|
||||
var hosts []hostServices
|
||||
serviceSet := map[string]bool{}
|
||||
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil || cs.Report == nil || cs.Report.Services == nil {
|
||||
continue
|
||||
}
|
||||
hs := hostServices{
|
||||
host: cs.Node.Host,
|
||||
shortIP: shortIP(cs.Node.Host),
|
||||
services: make(map[string]string),
|
||||
}
|
||||
for _, svc := range cs.Report.Services.Services {
|
||||
hs.services[svc.Name] = svc.ActiveState
|
||||
serviceSet[svc.Name] = true
|
||||
}
|
||||
hosts = append(hosts, hs)
|
||||
}
|
||||
|
||||
// Sort service names
|
||||
var svcNames []string
|
||||
for name := range serviceSet {
|
||||
svcNames = append(svcNames, name)
|
||||
}
|
||||
sort.Strings(svcNames)
|
||||
|
||||
if len(hosts) == 0 || len(svcNames) == 0 {
|
||||
fmt.Fprintln(w, styleMuted.Render(" No service data available"))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Header: SERVICE + each host short IP
|
||||
hdr := fmt.Sprintf("%-22s", styleHeader.Render("SERVICE"))
|
||||
for _, h := range hosts {
|
||||
hdr += fmt.Sprintf("%-12s", styleHeader.Render(h.shortIP))
|
||||
}
|
||||
fmt.Fprintln(w, hdr)
|
||||
fmt.Fprintln(w, separator(22+12*len(hosts)))
|
||||
|
||||
// Rows
|
||||
for _, name := range svcNames {
|
||||
row := fmt.Sprintf("%-22s", name)
|
||||
for _, h := range hosts {
|
||||
state, ok := h.services[name]
|
||||
if !ok {
|
||||
row += fmt.Sprintf("%-12s", styleMuted.Render("--"))
|
||||
} else {
|
||||
row += fmt.Sprintf("%-12s", colorServiceState(state))
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, row)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ServiceJSON writes the service matrix as JSON.
|
||||
func ServiceJSON(snap *monitor.ClusterSnapshot, w io.Writer) error {
|
||||
type svcEntry struct {
|
||||
Host string `json:"host"`
|
||||
Services map[string]string `json:"services"`
|
||||
}
|
||||
|
||||
var entries []svcEntry
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil || cs.Report == nil || cs.Report.Services == nil {
|
||||
continue
|
||||
}
|
||||
e := svcEntry{
|
||||
Host: cs.Node.Host,
|
||||
Services: make(map[string]string),
|
||||
}
|
||||
for _, svc := range cs.Report.Services.Services {
|
||||
e.Services[svc.Name] = svc.ActiveState
|
||||
}
|
||||
entries = append(entries, e)
|
||||
}
|
||||
|
||||
return writeJSON(w, entries)
|
||||
}
|
||||
|
||||
// shortIP truncates an IP to the first 3 octets for compact display.
|
||||
func shortIP(ip string) string {
|
||||
parts := strings.Split(ip, ".")
|
||||
if len(parts) == 4 {
|
||||
return parts[0] + "." + parts[1] + "." + parts[2]
|
||||
}
|
||||
if len(ip) > 12 {
|
||||
return ip[:12]
|
||||
}
|
||||
return ip
|
||||
}
|
||||
|
||||
// colorServiceState renders a service state with appropriate color.
|
||||
func colorServiceState(state string) string {
|
||||
switch state {
|
||||
case "active":
|
||||
return styleGreen.Render("ACTIVE")
|
||||
case "failed":
|
||||
return styleRed.Render("FAILED")
|
||||
case "inactive":
|
||||
return styleMuted.Render("inactive")
|
||||
default:
|
||||
return styleYellow.Render(state)
|
||||
}
|
||||
}
|
||||
53
pkg/cli/monitor/display/table.go
Normal file
53
pkg/cli/monitor/display/table.go
Normal file
@ -0,0 +1,53 @@
|
||||
package display
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
)
|
||||
|
||||
var (
|
||||
styleGreen = lipgloss.NewStyle().Foreground(lipgloss.Color("#00ff00"))
|
||||
styleRed = lipgloss.NewStyle().Foreground(lipgloss.Color("#ff0000"))
|
||||
styleYellow = lipgloss.NewStyle().Foreground(lipgloss.Color("#ffff00"))
|
||||
styleMuted = lipgloss.NewStyle().Foreground(lipgloss.Color("#888888"))
|
||||
styleBold = lipgloss.NewStyle().Bold(true)
|
||||
styleHeader = lipgloss.NewStyle().Bold(true).Foreground(lipgloss.Color("#ffffff"))
|
||||
)
|
||||
|
||||
// statusIcon returns a green "OK" or red "!!" indicator.
|
||||
func statusIcon(ok bool) string {
|
||||
if ok {
|
||||
return styleGreen.Render("OK")
|
||||
}
|
||||
return styleRed.Render("!!")
|
||||
}
|
||||
|
||||
// severityColor returns the lipgloss style for a given alert severity.
|
||||
func severityColor(s monitor.AlertSeverity) lipgloss.Style {
|
||||
switch s {
|
||||
case monitor.AlertCritical:
|
||||
return styleRed
|
||||
case monitor.AlertWarning:
|
||||
return styleYellow
|
||||
case monitor.AlertInfo:
|
||||
return styleMuted
|
||||
default:
|
||||
return styleMuted
|
||||
}
|
||||
}
|
||||
|
||||
// separator returns a dashed line of the given width.
|
||||
func separator(width int) string {
|
||||
return strings.Repeat("\u2500", width)
|
||||
}
|
||||
|
||||
// writeJSON encodes v as indented JSON to w.
|
||||
func writeJSON(w io.Writer, v interface{}) error {
|
||||
enc := json.NewEncoder(w)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(v)
|
||||
}
|
||||
75
pkg/cli/monitor/snapshot.go
Normal file
75
pkg/cli/monitor/snapshot.go
Normal file
@ -0,0 +1,75 @@
|
||||
package monitor
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
|
||||
"github.com/DeBrosOfficial/network/pkg/inspector"
|
||||
)
|
||||
|
||||
// CollectionStatus tracks the SSH collection result for a single node.
|
||||
type CollectionStatus struct {
|
||||
Node inspector.Node
|
||||
Report *report.NodeReport
|
||||
Error error
|
||||
Duration time.Duration
|
||||
Retries int
|
||||
}
|
||||
|
||||
// ClusterSnapshot is the aggregated state of the entire cluster at a point in time.
|
||||
type ClusterSnapshot struct {
|
||||
Environment string
|
||||
CollectedAt time.Time
|
||||
Duration time.Duration
|
||||
Nodes []CollectionStatus
|
||||
Alerts []Alert
|
||||
}
|
||||
|
||||
// Healthy returns only nodes that reported successfully.
|
||||
func (cs *ClusterSnapshot) Healthy() []*report.NodeReport {
|
||||
var out []*report.NodeReport
|
||||
for _, n := range cs.Nodes {
|
||||
if n.Report != nil {
|
||||
out = append(out, n.Report)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Failed returns nodes where SSH or parsing failed.
|
||||
func (cs *ClusterSnapshot) Failed() []CollectionStatus {
|
||||
var out []CollectionStatus
|
||||
for _, n := range cs.Nodes {
|
||||
if n.Error != nil {
|
||||
out = append(out, n)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ByHost returns a map of host -> NodeReport for quick lookup.
|
||||
func (cs *ClusterSnapshot) ByHost() map[string]*report.NodeReport {
|
||||
m := make(map[string]*report.NodeReport, len(cs.Nodes))
|
||||
for _, n := range cs.Nodes {
|
||||
if n.Report != nil {
|
||||
m[n.Node.Host] = n.Report
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// HealthyCount returns the number of nodes that reported successfully.
|
||||
func (cs *ClusterSnapshot) HealthyCount() int {
|
||||
count := 0
|
||||
for _, n := range cs.Nodes {
|
||||
if n.Report != nil {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// TotalCount returns the total number of nodes attempted.
|
||||
func (cs *ClusterSnapshot) TotalCount() int {
|
||||
return len(cs.Nodes)
|
||||
}
|
||||
88
pkg/cli/monitor/tui/alerts.go
Normal file
88
pkg/cli/monitor/tui/alerts.go
Normal file
@ -0,0 +1,88 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// renderAlertsTab renders all alerts sorted by severity.
|
||||
func renderAlertsTab(snap *monitor.ClusterSnapshot, width int) string {
|
||||
if snap == nil {
|
||||
return styleMuted.Render("Collecting cluster data...")
|
||||
}
|
||||
|
||||
if len(snap.Alerts) == 0 {
|
||||
return styleHealthy.Render(" No alerts. All systems nominal.")
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
critCount, warnCount, infoCount := countAlertsBySeverity(snap.Alerts)
|
||||
b.WriteString(styleBold.Render("Alerts"))
|
||||
b.WriteString(fmt.Sprintf(" %s %s %s\n",
|
||||
styleCritical.Render(fmt.Sprintf("%d critical", critCount)),
|
||||
styleWarning.Render(fmt.Sprintf("%d warning", warnCount)),
|
||||
styleMuted.Render(fmt.Sprintf("%d info", infoCount)),
|
||||
))
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n\n")
|
||||
|
||||
// Sort: critical first, then warning, then info
|
||||
sorted := make([]monitor.Alert, len(snap.Alerts))
|
||||
copy(sorted, snap.Alerts)
|
||||
sort.Slice(sorted, func(i, j int) bool {
|
||||
return severityRank(sorted[i].Severity) < severityRank(sorted[j].Severity)
|
||||
})
|
||||
|
||||
// Group by severity
|
||||
currentSev := monitor.AlertSeverity("")
|
||||
for _, a := range sorted {
|
||||
if a.Severity != currentSev {
|
||||
currentSev = a.Severity
|
||||
label := strings.ToUpper(string(a.Severity))
|
||||
b.WriteString(severityStyle(string(a.Severity)).Render(fmt.Sprintf(" ── %s ", label)))
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
sevTag := formatSeverityTag(a.Severity)
|
||||
b.WriteString(fmt.Sprintf(" %s %-12s %-18s %s\n",
|
||||
sevTag,
|
||||
styleMuted.Render("["+a.Subsystem+"]"),
|
||||
a.Node,
|
||||
a.Message,
|
||||
))
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// severityRank returns a sort rank (lower = more severe).
|
||||
func severityRank(s monitor.AlertSeverity) int {
|
||||
switch s {
|
||||
case monitor.AlertCritical:
|
||||
return 0
|
||||
case monitor.AlertWarning:
|
||||
return 1
|
||||
case monitor.AlertInfo:
|
||||
return 2
|
||||
default:
|
||||
return 3
|
||||
}
|
||||
}
|
||||
|
||||
// formatSeverityTag returns a styled severity label.
|
||||
func formatSeverityTag(s monitor.AlertSeverity) string {
|
||||
switch s {
|
||||
case monitor.AlertCritical:
|
||||
return styleCritical.Render("CRIT")
|
||||
case monitor.AlertWarning:
|
||||
return styleWarning.Render("WARN")
|
||||
case monitor.AlertInfo:
|
||||
return styleMuted.Render("INFO")
|
||||
default:
|
||||
return styleMuted.Render("????")
|
||||
}
|
||||
}
|
||||
109
pkg/cli/monitor/tui/dns.go
Normal file
109
pkg/cli/monitor/tui/dns.go
Normal file
@ -0,0 +1,109 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// renderDNSTab renders DNS status for nameserver nodes.
|
||||
func renderDNSTab(snap *monitor.ClusterSnapshot, width int) string {
|
||||
if snap == nil {
|
||||
return styleMuted.Render("Collecting cluster data...")
|
||||
}
|
||||
|
||||
if snap.HealthyCount() == 0 {
|
||||
return styleMuted.Render("No healthy nodes to display.")
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
b.WriteString(styleBold.Render("DNS / Nameserver Status"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n\n")
|
||||
|
||||
hasDNS := false
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Report == nil || cs.Report.DNS == nil {
|
||||
continue
|
||||
}
|
||||
hasDNS = true
|
||||
r := cs.Report
|
||||
dns := r.DNS
|
||||
host := nodeHost(r)
|
||||
role := cs.Node.Role
|
||||
|
||||
b.WriteString(styleBold.Render(fmt.Sprintf(" %s", host)))
|
||||
if role != "" {
|
||||
b.WriteString(fmt.Sprintf(" (%s)", role))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// Service status
|
||||
b.WriteString(fmt.Sprintf(" CoreDNS: %s", statusStr(dns.CoreDNSActive)))
|
||||
if dns.CoreDNSMemMB > 0 {
|
||||
b.WriteString(fmt.Sprintf(" mem=%dMB", dns.CoreDNSMemMB))
|
||||
}
|
||||
if dns.CoreDNSRestarts > 0 {
|
||||
b.WriteString(fmt.Sprintf(" restarts=%s", styleWarning.Render(fmt.Sprintf("%d", dns.CoreDNSRestarts))))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString(fmt.Sprintf(" Caddy: %s\n", statusStr(dns.CaddyActive)))
|
||||
|
||||
// Port bindings
|
||||
b.WriteString(fmt.Sprintf(" Ports: 53=%s 80=%s 443=%s\n",
|
||||
statusStr(dns.Port53Bound),
|
||||
statusStr(dns.Port80Bound),
|
||||
statusStr(dns.Port443Bound),
|
||||
))
|
||||
|
||||
// DNS resolution checks
|
||||
b.WriteString(fmt.Sprintf(" SOA: %s\n", statusStr(dns.SOAResolves)))
|
||||
b.WriteString(fmt.Sprintf(" NS: %s", statusStr(dns.NSResolves)))
|
||||
if dns.NSRecordCount > 0 {
|
||||
b.WriteString(fmt.Sprintf(" (%d records)", dns.NSRecordCount))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
b.WriteString(fmt.Sprintf(" Base A: %s\n", statusStr(dns.BaseAResolves)))
|
||||
b.WriteString(fmt.Sprintf(" Wildcard: %s\n", statusStr(dns.WildcardResolves)))
|
||||
b.WriteString(fmt.Sprintf(" Corefile: %s\n", statusStr(dns.CorefileExists)))
|
||||
|
||||
// TLS certificates
|
||||
baseTLS := renderTLSDays(dns.BaseTLSDaysLeft, "base")
|
||||
wildTLS := renderTLSDays(dns.WildTLSDaysLeft, "wildcard")
|
||||
b.WriteString(fmt.Sprintf(" TLS: %s %s\n", baseTLS, wildTLS))
|
||||
|
||||
// Log errors
|
||||
if dns.LogErrors > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Log errors: %s (5m)\n",
|
||||
styleWarning.Render(fmt.Sprintf("%d", dns.LogErrors))))
|
||||
}
|
||||
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
if !hasDNS {
|
||||
return styleMuted.Render("No nameserver nodes found (no DNS data reported).")
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// renderTLSDays formats TLS certificate expiry with color coding.
|
||||
func renderTLSDays(days int, label string) string {
|
||||
if days < 0 {
|
||||
return styleMuted.Render(fmt.Sprintf("%s: n/a", label))
|
||||
}
|
||||
s := fmt.Sprintf("%s: %dd", label, days)
|
||||
switch {
|
||||
case days < 7:
|
||||
return styleCritical.Render(s)
|
||||
case days < 14:
|
||||
return styleWarning.Render(s)
|
||||
default:
|
||||
return styleHealthy.Render(s)
|
||||
}
|
||||
}
|
||||
21
pkg/cli/monitor/tui/keys.go
Normal file
21
pkg/cli/monitor/tui/keys.go
Normal file
@ -0,0 +1,21 @@
|
||||
package tui
|
||||
|
||||
import "github.com/charmbracelet/bubbles/key"
|
||||
|
||||
type keyMap struct {
|
||||
Quit key.Binding
|
||||
NextTab key.Binding
|
||||
PrevTab key.Binding
|
||||
Refresh key.Binding
|
||||
ScrollUp key.Binding
|
||||
ScrollDown key.Binding
|
||||
}
|
||||
|
||||
var keys = keyMap{
|
||||
Quit: key.NewBinding(key.WithKeys("q", "ctrl+c"), key.WithHelp("q", "quit")),
|
||||
NextTab: key.NewBinding(key.WithKeys("tab", "l"), key.WithHelp("tab", "next tab")),
|
||||
PrevTab: key.NewBinding(key.WithKeys("shift+tab", "h"), key.WithHelp("shift+tab", "prev tab")),
|
||||
Refresh: key.NewBinding(key.WithKeys("r"), key.WithHelp("r", "refresh")),
|
||||
ScrollUp: key.NewBinding(key.WithKeys("up", "k")),
|
||||
ScrollDown: key.NewBinding(key.WithKeys("down", "j")),
|
||||
}
|
||||
226
pkg/cli/monitor/tui/model.go
Normal file
226
pkg/cli/monitor/tui/model.go
Normal file
@ -0,0 +1,226 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/charmbracelet/bubbles/viewport"
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
const (
|
||||
tabOverview = iota
|
||||
tabNodes
|
||||
tabServices
|
||||
tabMesh
|
||||
tabDNS
|
||||
tabNamespaces
|
||||
tabAlerts
|
||||
tabCount
|
||||
)
|
||||
|
||||
var tabNames = []string{"Overview", "Nodes", "Services", "WG Mesh", "DNS", "Namespaces", "Alerts"}
|
||||
|
||||
// snapshotMsg carries the result of a background collection.
|
||||
type snapshotMsg struct {
|
||||
snap *monitor.ClusterSnapshot
|
||||
err error
|
||||
}
|
||||
|
||||
// tickMsg fires on each refresh interval.
|
||||
type tickMsg time.Time
|
||||
|
||||
// model is the root Bubbletea model for the Orama monitor TUI.
|
||||
type model struct {
|
||||
cfg monitor.CollectorConfig
|
||||
interval time.Duration
|
||||
activeTab int
|
||||
viewport viewport.Model
|
||||
width int
|
||||
height int
|
||||
snapshot *monitor.ClusterSnapshot
|
||||
loading bool
|
||||
lastError error
|
||||
lastUpdate time.Time
|
||||
quitting bool
|
||||
}
|
||||
|
||||
// newModel creates a fresh model with default viewport dimensions.
|
||||
func newModel(cfg monitor.CollectorConfig, interval time.Duration) model {
|
||||
vp := viewport.New(80, 24)
|
||||
return model{
|
||||
cfg: cfg,
|
||||
interval: interval,
|
||||
viewport: vp,
|
||||
loading: true,
|
||||
}
|
||||
}
|
||||
|
||||
func (m model) Init() tea.Cmd {
|
||||
return tea.Batch(doCollect(m.cfg), tickCmd(m.interval))
|
||||
}
|
||||
|
||||
func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
var cmds []tea.Cmd
|
||||
|
||||
switch msg := msg.(type) {
|
||||
case tea.KeyMsg:
|
||||
switch {
|
||||
case msg.String() == "q" || msg.String() == "ctrl+c":
|
||||
m.quitting = true
|
||||
return m, tea.Quit
|
||||
|
||||
case msg.String() == "tab" || msg.String() == "l":
|
||||
m.activeTab = (m.activeTab + 1) % tabCount
|
||||
m.updateContent()
|
||||
m.viewport.GotoTop()
|
||||
return m, nil
|
||||
|
||||
case msg.String() == "shift+tab" || msg.String() == "h":
|
||||
m.activeTab = (m.activeTab - 1 + tabCount) % tabCount
|
||||
m.updateContent()
|
||||
m.viewport.GotoTop()
|
||||
return m, nil
|
||||
|
||||
case msg.String() == "r":
|
||||
if !m.loading {
|
||||
m.loading = true
|
||||
return m, doCollect(m.cfg)
|
||||
}
|
||||
return m, nil
|
||||
|
||||
default:
|
||||
// Delegate scrolling to viewport
|
||||
var cmd tea.Cmd
|
||||
m.viewport, cmd = m.viewport.Update(msg)
|
||||
return m, cmd
|
||||
}
|
||||
|
||||
case tea.WindowSizeMsg:
|
||||
m.width = msg.Width
|
||||
m.height = msg.Height
|
||||
// Reserve 4 lines: header, tab bar, blank separator, footer
|
||||
vpHeight := msg.Height - 4
|
||||
if vpHeight < 1 {
|
||||
vpHeight = 1
|
||||
}
|
||||
m.viewport.Width = msg.Width
|
||||
m.viewport.Height = vpHeight
|
||||
m.updateContent()
|
||||
return m, nil
|
||||
|
||||
case snapshotMsg:
|
||||
m.loading = false
|
||||
if msg.err != nil {
|
||||
m.lastError = msg.err
|
||||
} else {
|
||||
m.snapshot = msg.snap
|
||||
m.lastError = nil
|
||||
m.lastUpdate = time.Now()
|
||||
}
|
||||
m.updateContent()
|
||||
return m, nil
|
||||
|
||||
case tickMsg:
|
||||
if !m.loading {
|
||||
m.loading = true
|
||||
cmds = append(cmds, doCollect(m.cfg))
|
||||
}
|
||||
cmds = append(cmds, tickCmd(m.interval))
|
||||
return m, tea.Batch(cmds...)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m model) View() string {
|
||||
if m.quitting {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Header
|
||||
var header string
|
||||
if m.snapshot != nil {
|
||||
ago := time.Since(m.lastUpdate).Truncate(time.Second)
|
||||
header = headerStyle.Render(fmt.Sprintf(
|
||||
"Orama Monitor — %s — Last: %s (%s ago)",
|
||||
m.snapshot.Environment,
|
||||
m.lastUpdate.Format("15:04:05"),
|
||||
ago,
|
||||
))
|
||||
} else if m.loading {
|
||||
header = headerStyle.Render("Orama Monitor — collecting...")
|
||||
} else if m.lastError != nil {
|
||||
header = headerStyle.Render(fmt.Sprintf("Orama Monitor — error: %v", m.lastError))
|
||||
} else {
|
||||
header = headerStyle.Render("Orama Monitor")
|
||||
}
|
||||
|
||||
if m.loading && m.snapshot != nil {
|
||||
header += styleMuted.Render(" (refreshing...)")
|
||||
}
|
||||
|
||||
// Tab bar
|
||||
tabs := renderTabBar(m.activeTab, m.width)
|
||||
|
||||
// Footer
|
||||
footer := footerStyle.Render("tab: switch | j/k: scroll | r: refresh | q: quit")
|
||||
|
||||
return header + "\n" + tabs + "\n" + m.viewport.View() + "\n" + footer
|
||||
}
|
||||
|
||||
// updateContent renders the active tab and sets it on the viewport.
|
||||
func (m *model) updateContent() {
|
||||
w := m.width
|
||||
if w == 0 {
|
||||
w = 80
|
||||
}
|
||||
|
||||
var content string
|
||||
switch m.activeTab {
|
||||
case tabOverview:
|
||||
content = renderOverview(m.snapshot, w)
|
||||
case tabNodes:
|
||||
content = renderNodes(m.snapshot, w)
|
||||
case tabServices:
|
||||
content = renderServicesTab(m.snapshot, w)
|
||||
case tabMesh:
|
||||
content = renderWGMesh(m.snapshot, w)
|
||||
case tabDNS:
|
||||
content = renderDNSTab(m.snapshot, w)
|
||||
case tabNamespaces:
|
||||
content = renderNamespacesTab(m.snapshot, w)
|
||||
case tabAlerts:
|
||||
content = renderAlertsTab(m.snapshot, w)
|
||||
}
|
||||
|
||||
m.viewport.SetContent(content)
|
||||
}
|
||||
|
||||
// doCollect returns a tea.Cmd that runs monitor.CollectOnce in a goroutine.
|
||||
func doCollect(cfg monitor.CollectorConfig) tea.Cmd {
|
||||
return func() tea.Msg {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
snap, err := monitor.CollectOnce(ctx, cfg)
|
||||
return snapshotMsg{snap: snap, err: err}
|
||||
}
|
||||
}
|
||||
|
||||
// tickCmd returns a tea.Cmd that fires a tickMsg after the given interval.
|
||||
func tickCmd(d time.Duration) tea.Cmd {
|
||||
return tea.Tick(d, func(t time.Time) tea.Msg {
|
||||
return tickMsg(t)
|
||||
})
|
||||
}
|
||||
|
||||
// Run starts the TUI program with the given collector config.
|
||||
func Run(cfg monitor.CollectorConfig) error {
|
||||
m := newModel(cfg, 30*time.Second)
|
||||
p := tea.NewProgram(m, tea.WithAltScreen())
|
||||
_, err := p.Run()
|
||||
return err
|
||||
}
|
||||
158
pkg/cli/monitor/tui/namespaces.go
Normal file
158
pkg/cli/monitor/tui/namespaces.go
Normal file
@ -0,0 +1,158 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// renderNamespacesTab renders per-namespace health across all nodes.
|
||||
func renderNamespacesTab(snap *monitor.ClusterSnapshot, width int) string {
|
||||
if snap == nil {
|
||||
return styleMuted.Render("Collecting cluster data...")
|
||||
}
|
||||
|
||||
reports := snap.Healthy()
|
||||
if len(reports) == 0 {
|
||||
return styleMuted.Render("No healthy nodes to display.")
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
b.WriteString(styleBold.Render("Namespace Health"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n\n")
|
||||
|
||||
// Collect unique namespace names
|
||||
nsSet := make(map[string]bool)
|
||||
for _, r := range reports {
|
||||
for _, ns := range r.Namespaces {
|
||||
nsSet[ns.Name] = true
|
||||
}
|
||||
}
|
||||
|
||||
nsNames := make([]string, 0, len(nsSet))
|
||||
for name := range nsSet {
|
||||
nsNames = append(nsNames, name)
|
||||
}
|
||||
sort.Strings(nsNames)
|
||||
|
||||
if len(nsNames) == 0 {
|
||||
return styleMuted.Render("No namespaces found on any node.")
|
||||
}
|
||||
|
||||
// Header
|
||||
header := fmt.Sprintf(" %-20s", headerStyle.Render("NAMESPACE"))
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
if len(host) > 15 {
|
||||
host = host[:15]
|
||||
}
|
||||
header += fmt.Sprintf(" %-17s", headerStyle.Render(host))
|
||||
}
|
||||
b.WriteString(header)
|
||||
b.WriteString("\n")
|
||||
|
||||
// Build lookup: host -> ns name -> NamespaceReport
|
||||
type nsKey struct {
|
||||
host string
|
||||
name string
|
||||
}
|
||||
nsMap := make(map[nsKey]nsStatus)
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
for _, ns := range r.Namespaces {
|
||||
nsMap[nsKey{host, ns.Name}] = nsStatus{
|
||||
gateway: ns.GatewayUp,
|
||||
rqlite: ns.RQLiteUp,
|
||||
rqliteState: ns.RQLiteState,
|
||||
rqliteReady: ns.RQLiteReady,
|
||||
olric: ns.OlricUp,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rows
|
||||
for _, nsName := range nsNames {
|
||||
row := fmt.Sprintf(" %-20s", nsName)
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
ns, ok := nsMap[nsKey{host, nsName}]
|
||||
if !ok {
|
||||
row += fmt.Sprintf(" %-17s", styleMuted.Render("-"))
|
||||
continue
|
||||
}
|
||||
row += fmt.Sprintf(" %-17s", renderNsCell(ns))
|
||||
}
|
||||
b.WriteString(row)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
// Detailed per-namespace view
|
||||
b.WriteString("\n")
|
||||
b.WriteString(styleBold.Render("Namespace Details"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n")
|
||||
|
||||
for _, nsName := range nsNames {
|
||||
b.WriteString(fmt.Sprintf("\n %s\n", styleBold.Render(nsName)))
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
for _, ns := range r.Namespaces {
|
||||
if ns.Name != nsName {
|
||||
continue
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" %-18s gw=%s rqlite=%s",
|
||||
host,
|
||||
statusStr(ns.GatewayUp),
|
||||
statusStr(ns.RQLiteUp),
|
||||
))
|
||||
if ns.RQLiteState != "" {
|
||||
b.WriteString(fmt.Sprintf("(%s)", ns.RQLiteState))
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" olric=%s", statusStr(ns.OlricUp)))
|
||||
if ns.PortBase > 0 {
|
||||
b.WriteString(fmt.Sprintf(" port=%d", ns.PortBase))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// nsStatus holds a namespace's health indicators for one node.
|
||||
type nsStatus struct {
|
||||
gateway bool
|
||||
rqlite bool
|
||||
rqliteState string
|
||||
rqliteReady bool
|
||||
olric bool
|
||||
}
|
||||
|
||||
// renderNsCell renders a compact cell for the namespace matrix.
|
||||
func renderNsCell(ns nsStatus) string {
|
||||
if ns.gateway && ns.rqlite && ns.olric {
|
||||
return styleHealthy.Render("OK")
|
||||
}
|
||||
if !ns.gateway && !ns.rqlite {
|
||||
return styleCritical.Render("DOWN")
|
||||
}
|
||||
// Partial
|
||||
parts := []string{}
|
||||
if !ns.gateway {
|
||||
parts = append(parts, "gw")
|
||||
}
|
||||
if !ns.rqlite {
|
||||
parts = append(parts, "rq")
|
||||
}
|
||||
if !ns.olric {
|
||||
parts = append(parts, "ol")
|
||||
}
|
||||
return styleWarning.Render("!" + strings.Join(parts, ","))
|
||||
}
|
||||
147
pkg/cli/monitor/tui/nodes.go
Normal file
147
pkg/cli/monitor/tui/nodes.go
Normal file
@ -0,0 +1,147 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// renderNodes renders the Nodes tab with detailed per-node information.
|
||||
func renderNodes(snap *monitor.ClusterSnapshot, width int) string {
|
||||
if snap == nil {
|
||||
return styleMuted.Render("Collecting cluster data...")
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
for i, cs := range snap.Nodes {
|
||||
if i > 0 {
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
host := cs.Node.Host
|
||||
role := cs.Node.Role
|
||||
if role == "" {
|
||||
role = "node"
|
||||
}
|
||||
|
||||
if cs.Error != nil {
|
||||
b.WriteString(styleBold.Render(fmt.Sprintf("Node: %s", host)))
|
||||
b.WriteString(fmt.Sprintf(" (%s)", role))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(fmt.Sprintf(" Status: %s\n", styleCritical.Render("UNREACHABLE")))
|
||||
b.WriteString(fmt.Sprintf(" Error: %s\n", styleCritical.Render(cs.Error.Error())))
|
||||
b.WriteString(fmt.Sprintf(" Took: %s\n", styleMuted.Render(cs.Duration.Truncate(time.Millisecond).String())))
|
||||
if cs.Retries > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Retries: %d\n", cs.Retries))
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
r := cs.Report
|
||||
if r == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
b.WriteString(styleBold.Render(fmt.Sprintf("Node: %s", host)))
|
||||
b.WriteString(fmt.Sprintf(" (%s) ", role))
|
||||
b.WriteString(styleHealthy.Render("ONLINE"))
|
||||
if r.Version != "" {
|
||||
b.WriteString(fmt.Sprintf(" v%s", r.Version))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n")
|
||||
|
||||
// System Resources
|
||||
if r.System != nil {
|
||||
sys := r.System
|
||||
b.WriteString(styleBold.Render(" System"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(fmt.Sprintf(" CPU: %d cores, load %.1f / %.1f / %.1f\n",
|
||||
sys.CPUCount, sys.LoadAvg1, sys.LoadAvg5, sys.LoadAvg15))
|
||||
b.WriteString(fmt.Sprintf(" Memory: %s (%d / %d MB, %d MB avail)\n",
|
||||
colorPct(sys.MemUsePct), sys.MemUsedMB, sys.MemTotalMB, sys.MemAvailMB))
|
||||
b.WriteString(fmt.Sprintf(" Disk: %s (%s / %s, %s avail)\n",
|
||||
colorPct(sys.DiskUsePct), sys.DiskUsedGB, sys.DiskTotalGB, sys.DiskAvailGB))
|
||||
if sys.SwapTotalMB > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Swap: %d / %d MB\n", sys.SwapUsedMB, sys.SwapTotalMB))
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" Uptime: %s\n", sys.UptimeSince))
|
||||
if sys.OOMKills > 0 {
|
||||
b.WriteString(fmt.Sprintf(" OOM: %s\n", styleCritical.Render(fmt.Sprintf("%d kills", sys.OOMKills))))
|
||||
}
|
||||
}
|
||||
|
||||
// Services
|
||||
if r.Services != nil && len(r.Services.Services) > 0 {
|
||||
b.WriteString(styleBold.Render(" Services"))
|
||||
b.WriteString("\n")
|
||||
for _, svc := range r.Services.Services {
|
||||
stateStr := styleHealthy.Render(svc.ActiveState)
|
||||
if svc.ActiveState == "failed" {
|
||||
stateStr = styleCritical.Render("FAILED")
|
||||
} else if svc.ActiveState != "active" {
|
||||
stateStr = styleWarning.Render(svc.ActiveState)
|
||||
}
|
||||
extra := ""
|
||||
if svc.MemoryCurrentMB > 0 {
|
||||
extra += fmt.Sprintf(" mem=%dMB", svc.MemoryCurrentMB)
|
||||
}
|
||||
if svc.NRestarts > 0 {
|
||||
extra += fmt.Sprintf(" restarts=%d", svc.NRestarts)
|
||||
}
|
||||
if svc.RestartLoopRisk {
|
||||
extra += styleCritical.Render(" RESTART-LOOP")
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" %-28s %s%s\n", svc.Name, stateStr, extra))
|
||||
}
|
||||
if len(r.Services.FailedUnits) > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Failed units: %s\n",
|
||||
styleCritical.Render(strings.Join(r.Services.FailedUnits, ", "))))
|
||||
}
|
||||
}
|
||||
|
||||
// RQLite
|
||||
if r.RQLite != nil {
|
||||
rq := r.RQLite
|
||||
b.WriteString(styleBold.Render(" RQLite"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(fmt.Sprintf(" Responsive: %s Ready: %s Strong Read: %s\n",
|
||||
statusStr(rq.Responsive), statusStr(rq.Ready), statusStr(rq.StrongRead)))
|
||||
if rq.Responsive {
|
||||
b.WriteString(fmt.Sprintf(" Raft: %s Leader: %s Term: %d Applied: %d\n",
|
||||
styleBold.Render(rq.RaftState), rq.LeaderAddr, rq.Term, rq.Applied))
|
||||
if rq.DBSize != "" {
|
||||
b.WriteString(fmt.Sprintf(" DB size: %s Peers: %d Goroutines: %d Heap: %dMB\n",
|
||||
rq.DBSize, rq.NumPeers, rq.Goroutines, rq.HeapMB))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WireGuard
|
||||
if r.WireGuard != nil {
|
||||
wg := r.WireGuard
|
||||
b.WriteString(styleBold.Render(" WireGuard"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(fmt.Sprintf(" Interface: %s IP: %s Peers: %d\n",
|
||||
statusStr(wg.InterfaceUp), wg.WgIP, wg.PeerCount))
|
||||
}
|
||||
|
||||
// Network
|
||||
if r.Network != nil {
|
||||
net := r.Network
|
||||
b.WriteString(styleBold.Render(" Network"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(fmt.Sprintf(" Internet: %s UFW: %s TCP est: %d retrans: %.1f%%\n",
|
||||
statusStr(net.InternetReachable), statusStr(net.UFWActive),
|
||||
net.TCPEstablished, net.TCPRetransRate))
|
||||
}
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
183
pkg/cli/monitor/tui/overview.go
Normal file
183
pkg/cli/monitor/tui/overview.go
Normal file
@ -0,0 +1,183 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// renderOverview renders the Overview tab: cluster summary, node table, alert summary.
|
||||
func renderOverview(snap *monitor.ClusterSnapshot, width int) string {
|
||||
if snap == nil {
|
||||
return styleMuted.Render("Collecting cluster data...")
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
// -- Cluster Summary --
|
||||
b.WriteString(styleBold.Render("Cluster Summary"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n")
|
||||
|
||||
healthy := snap.HealthyCount()
|
||||
total := snap.TotalCount()
|
||||
failed := total - healthy
|
||||
|
||||
healthColor := styleHealthy
|
||||
if failed > 0 {
|
||||
healthColor = styleWarning
|
||||
}
|
||||
if healthy == 0 && total > 0 {
|
||||
healthColor = styleCritical
|
||||
}
|
||||
|
||||
b.WriteString(fmt.Sprintf(" Environment: %s\n", styleBold.Render(snap.Environment)))
|
||||
b.WriteString(fmt.Sprintf(" Nodes: %s / %d\n", healthColor.Render(fmt.Sprintf("%d healthy", healthy)), total))
|
||||
if failed > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Failed: %s\n", styleCritical.Render(fmt.Sprintf("%d", failed))))
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" Collect time: %s\n", styleMuted.Render(snap.Duration.Truncate(1e6).String())))
|
||||
b.WriteString("\n")
|
||||
|
||||
// -- Node Table --
|
||||
b.WriteString(styleBold.Render("Nodes"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n")
|
||||
|
||||
// Header row
|
||||
b.WriteString(fmt.Sprintf(" %-18s %-8s %-10s %-8s %-8s %-8s %-10s\n",
|
||||
headerStyle.Render("HOST"),
|
||||
headerStyle.Render("STATUS"),
|
||||
headerStyle.Render("ROLE"),
|
||||
headerStyle.Render("CPU"),
|
||||
headerStyle.Render("MEM%"),
|
||||
headerStyle.Render("DISK%"),
|
||||
headerStyle.Render("RQLITE"),
|
||||
))
|
||||
|
||||
for _, cs := range snap.Nodes {
|
||||
if cs.Error != nil {
|
||||
b.WriteString(fmt.Sprintf(" %-18s %s %s\n",
|
||||
cs.Node.Host,
|
||||
styleCritical.Render("FAIL"),
|
||||
styleMuted.Render(truncateStr(cs.Error.Error(), 40)),
|
||||
))
|
||||
continue
|
||||
}
|
||||
r := cs.Report
|
||||
if r == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
host := r.PublicIP
|
||||
if host == "" {
|
||||
host = r.Hostname
|
||||
}
|
||||
|
||||
var status string
|
||||
if cs.Error == nil && r != nil {
|
||||
status = styleHealthy.Render("OK")
|
||||
} else {
|
||||
status = styleCritical.Render("FAIL")
|
||||
}
|
||||
|
||||
role := cs.Node.Role
|
||||
if role == "" {
|
||||
role = "node"
|
||||
}
|
||||
|
||||
cpuStr := "-"
|
||||
memStr := "-"
|
||||
diskStr := "-"
|
||||
if r.System != nil {
|
||||
cpuStr = fmt.Sprintf("%.1f", r.System.LoadAvg1)
|
||||
memStr = colorPct(r.System.MemUsePct)
|
||||
diskStr = colorPct(r.System.DiskUsePct)
|
||||
}
|
||||
|
||||
rqliteStr := "-"
|
||||
if r.RQLite != nil {
|
||||
if r.RQLite.Responsive {
|
||||
rqliteStr = styleHealthy.Render(r.RQLite.RaftState)
|
||||
} else {
|
||||
rqliteStr = styleCritical.Render("DOWN")
|
||||
}
|
||||
}
|
||||
|
||||
b.WriteString(fmt.Sprintf(" %-18s %-8s %-10s %-8s %-8s %-8s %-10s\n",
|
||||
host, status, role, cpuStr, memStr, diskStr, rqliteStr))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// -- Alert Summary --
|
||||
critCount, warnCount, infoCount := countAlertsBySeverity(snap.Alerts)
|
||||
b.WriteString(styleBold.Render("Alerts"))
|
||||
b.WriteString(fmt.Sprintf(" %s %s %s\n",
|
||||
styleCritical.Render(fmt.Sprintf("%d critical", critCount)),
|
||||
styleWarning.Render(fmt.Sprintf("%d warning", warnCount)),
|
||||
styleMuted.Render(fmt.Sprintf("%d info", infoCount)),
|
||||
))
|
||||
|
||||
if critCount > 0 {
|
||||
b.WriteString("\n")
|
||||
for _, a := range snap.Alerts {
|
||||
if a.Severity == monitor.AlertCritical {
|
||||
b.WriteString(fmt.Sprintf(" %s [%s] %s: %s\n",
|
||||
styleCritical.Render("CRIT"),
|
||||
a.Subsystem,
|
||||
a.Node,
|
||||
a.Message,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// colorPct returns a percentage string colored by threshold.
|
||||
func colorPct(pct int) string {
|
||||
s := fmt.Sprintf("%d%%", pct)
|
||||
switch {
|
||||
case pct >= 90:
|
||||
return styleCritical.Render(s)
|
||||
case pct >= 75:
|
||||
return styleWarning.Render(s)
|
||||
default:
|
||||
return styleHealthy.Render(s)
|
||||
}
|
||||
}
|
||||
|
||||
// countAlertsBySeverity counts alerts by severity level.
|
||||
func countAlertsBySeverity(alerts []monitor.Alert) (crit, warn, info int) {
|
||||
for _, a := range alerts {
|
||||
switch a.Severity {
|
||||
case monitor.AlertCritical:
|
||||
crit++
|
||||
case monitor.AlertWarning:
|
||||
warn++
|
||||
case monitor.AlertInfo:
|
||||
info++
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// truncateStr truncates a string to maxLen characters.
|
||||
func truncateStr(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen] + "..."
|
||||
}
|
||||
|
||||
// separator returns a dashed line of the given width.
|
||||
func separator(width int) string {
|
||||
if width <= 0 {
|
||||
width = 80
|
||||
}
|
||||
return styleMuted.Render(strings.Repeat("\u2500", width))
|
||||
}
|
||||
133
pkg/cli/monitor/tui/services.go
Normal file
133
pkg/cli/monitor/tui/services.go
Normal file
@ -0,0 +1,133 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
)
|
||||
|
||||
// renderServicesTab renders a cross-node service matrix.
|
||||
func renderServicesTab(snap *monitor.ClusterSnapshot, width int) string {
|
||||
if snap == nil {
|
||||
return styleMuted.Render("Collecting cluster data...")
|
||||
}
|
||||
|
||||
reports := snap.Healthy()
|
||||
if len(reports) == 0 {
|
||||
return styleMuted.Render("No healthy nodes to display.")
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
// Collect all unique service names across nodes
|
||||
svcSet := make(map[string]bool)
|
||||
for _, r := range reports {
|
||||
if r.Services == nil {
|
||||
continue
|
||||
}
|
||||
for _, svc := range r.Services.Services {
|
||||
svcSet[svc.Name] = true
|
||||
}
|
||||
}
|
||||
|
||||
svcNames := make([]string, 0, len(svcSet))
|
||||
for name := range svcSet {
|
||||
svcNames = append(svcNames, name)
|
||||
}
|
||||
sort.Strings(svcNames)
|
||||
|
||||
if len(svcNames) == 0 {
|
||||
return styleMuted.Render("No services found on any node.")
|
||||
}
|
||||
|
||||
b.WriteString(styleBold.Render("Service Matrix"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n\n")
|
||||
|
||||
// Header: service name + each node host
|
||||
header := fmt.Sprintf(" %-28s", headerStyle.Render("SERVICE"))
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
if len(host) > 15 {
|
||||
host = host[:15]
|
||||
}
|
||||
header += fmt.Sprintf(" %-17s", headerStyle.Render(host))
|
||||
}
|
||||
b.WriteString(header)
|
||||
b.WriteString("\n")
|
||||
|
||||
// Build a lookup: host -> service name -> ServiceInfo
|
||||
type svcKey struct {
|
||||
host string
|
||||
name string
|
||||
}
|
||||
svcMap := make(map[svcKey]string) // status string
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
if r.Services == nil {
|
||||
continue
|
||||
}
|
||||
for _, svc := range r.Services.Services {
|
||||
var st string
|
||||
switch {
|
||||
case svc.ActiveState == "active":
|
||||
st = styleHealthy.Render("active")
|
||||
case svc.ActiveState == "failed":
|
||||
st = styleCritical.Render("FAILED")
|
||||
case svc.ActiveState == "":
|
||||
st = styleMuted.Render("n/a")
|
||||
default:
|
||||
st = styleWarning.Render(svc.ActiveState)
|
||||
}
|
||||
if svc.RestartLoopRisk {
|
||||
st = styleCritical.Render("LOOP!")
|
||||
}
|
||||
svcMap[svcKey{host, svc.Name}] = st
|
||||
}
|
||||
}
|
||||
|
||||
// Rows
|
||||
for _, svcName := range svcNames {
|
||||
row := fmt.Sprintf(" %-28s", svcName)
|
||||
for _, r := range reports {
|
||||
host := nodeHost(r)
|
||||
st, ok := svcMap[svcKey{host, svcName}]
|
||||
if !ok {
|
||||
st = styleMuted.Render("-")
|
||||
}
|
||||
row += fmt.Sprintf(" %-17s", st)
|
||||
}
|
||||
b.WriteString(row)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
// Failed units per node
|
||||
hasFailedUnits := false
|
||||
for _, r := range reports {
|
||||
if r.Services != nil && len(r.Services.FailedUnits) > 0 {
|
||||
hasFailedUnits = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasFailedUnits {
|
||||
b.WriteString("\n")
|
||||
b.WriteString(styleBold.Render("Failed Systemd Units"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n")
|
||||
for _, r := range reports {
|
||||
if r.Services == nil || len(r.Services.FailedUnits) == 0 {
|
||||
continue
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" %s: %s\n",
|
||||
styleBold.Render(nodeHost(r)),
|
||||
styleCritical.Render(strings.Join(r.Services.FailedUnits, ", ")),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
58
pkg/cli/monitor/tui/styles.go
Normal file
58
pkg/cli/monitor/tui/styles.go
Normal file
@ -0,0 +1,58 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"github.com/charmbracelet/lipgloss"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
|
||||
)
|
||||
|
||||
var (
|
||||
colorGreen = lipgloss.Color("#00ff00")
|
||||
colorRed = lipgloss.Color("#ff0000")
|
||||
colorYellow = lipgloss.Color("#ffff00")
|
||||
colorMuted = lipgloss.Color("#888888")
|
||||
colorWhite = lipgloss.Color("#ffffff")
|
||||
colorBg = lipgloss.Color("#1a1a2e")
|
||||
|
||||
styleHealthy = lipgloss.NewStyle().Foreground(colorGreen)
|
||||
styleWarning = lipgloss.NewStyle().Foreground(colorYellow)
|
||||
styleCritical = lipgloss.NewStyle().Foreground(colorRed)
|
||||
styleMuted = lipgloss.NewStyle().Foreground(colorMuted)
|
||||
styleBold = lipgloss.NewStyle().Bold(true)
|
||||
|
||||
activeTab = lipgloss.NewStyle().Bold(true).Foreground(colorWhite).Background(lipgloss.Color("#333333")).Padding(0, 1)
|
||||
inactiveTab = lipgloss.NewStyle().Foreground(colorMuted).Padding(0, 1)
|
||||
|
||||
headerStyle = lipgloss.NewStyle().Bold(true).Foreground(colorWhite)
|
||||
footerStyle = lipgloss.NewStyle().Foreground(colorMuted)
|
||||
)
|
||||
|
||||
// statusStr returns a green "OK" when ok is true, red "DOWN" when false.
|
||||
func statusStr(ok bool) string {
|
||||
if ok {
|
||||
return styleHealthy.Render("OK")
|
||||
}
|
||||
return styleCritical.Render("DOWN")
|
||||
}
|
||||
|
||||
// severityStyle returns the appropriate lipgloss style for an alert severity.
|
||||
func severityStyle(s string) lipgloss.Style {
|
||||
switch s {
|
||||
case "critical":
|
||||
return styleCritical
|
||||
case "warning":
|
||||
return styleWarning
|
||||
case "info":
|
||||
return styleMuted
|
||||
default:
|
||||
return styleMuted
|
||||
}
|
||||
}
|
||||
|
||||
// nodeHost returns the best display host for a NodeReport.
|
||||
func nodeHost(r *report.NodeReport) string {
|
||||
if r.PublicIP != "" {
|
||||
return r.PublicIP
|
||||
}
|
||||
return r.Hostname
|
||||
}
|
||||
47
pkg/cli/monitor/tui/tabs.go
Normal file
47
pkg/cli/monitor/tui/tabs.go
Normal file
@ -0,0 +1,47 @@
|
||||
package tui
|
||||
|
||||
import "strings"
|
||||
|
||||
// renderTabBar renders the tab bar with the active tab highlighted.
|
||||
func renderTabBar(active int, width int) string {
|
||||
var parts []string
|
||||
for i, name := range tabNames {
|
||||
if i == active {
|
||||
parts = append(parts, activeTab.Render(name))
|
||||
} else {
|
||||
parts = append(parts, inactiveTab.Render(name))
|
||||
}
|
||||
}
|
||||
|
||||
bar := strings.Join(parts, styleMuted.Render(" | "))
|
||||
|
||||
// Pad to full width if needed
|
||||
if width > 0 {
|
||||
rendered := stripAnsi(bar)
|
||||
if len(rendered) < width {
|
||||
bar += strings.Repeat(" ", width-len(rendered))
|
||||
}
|
||||
}
|
||||
|
||||
return bar
|
||||
}
|
||||
|
||||
// stripAnsi removes ANSI escape codes for length calculation.
|
||||
func stripAnsi(s string) string {
|
||||
var out []byte
|
||||
inEsc := false
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == '\x1b' {
|
||||
inEsc = true
|
||||
continue
|
||||
}
|
||||
if inEsc {
|
||||
if (s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z') {
|
||||
inEsc = false
|
||||
}
|
||||
continue
|
||||
}
|
||||
out = append(out, s[i])
|
||||
}
|
||||
return string(out)
|
||||
}
|
||||
129
pkg/cli/monitor/tui/wgmesh.go
Normal file
129
pkg/cli/monitor/tui/wgmesh.go
Normal file
@ -0,0 +1,129 @@
|
||||
package tui
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/monitor"
|
||||
"github.com/DeBrosOfficial/network/pkg/cli/production/report"
|
||||
)
|
||||
|
||||
// renderWGMesh renders the WireGuard mesh status tab with peer details.
|
||||
func renderWGMesh(snap *monitor.ClusterSnapshot, width int) string {
|
||||
if snap == nil {
|
||||
return styleMuted.Render("Collecting cluster data...")
|
||||
}
|
||||
|
||||
reports := snap.Healthy()
|
||||
if len(reports) == 0 {
|
||||
return styleMuted.Render("No healthy nodes to display.")
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
// Mesh overview
|
||||
b.WriteString(styleBold.Render("WireGuard Mesh Overview"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n\n")
|
||||
|
||||
// Summary header
|
||||
b.WriteString(fmt.Sprintf(" %-18s %-10s %-18s %-6s %-8s\n",
|
||||
headerStyle.Render("HOST"),
|
||||
headerStyle.Render("IFACE"),
|
||||
headerStyle.Render("WG IP"),
|
||||
headerStyle.Render("PEERS"),
|
||||
headerStyle.Render("PORT"),
|
||||
))
|
||||
|
||||
wgNodes := 0
|
||||
for _, r := range reports {
|
||||
if r.WireGuard == nil {
|
||||
continue
|
||||
}
|
||||
wgNodes++
|
||||
wg := r.WireGuard
|
||||
ifaceStr := statusStr(wg.InterfaceUp)
|
||||
b.WriteString(fmt.Sprintf(" %-18s %-10s %-18s %-6d %-8d\n",
|
||||
nodeHost(r), ifaceStr, wg.WgIP, wg.PeerCount, wg.ListenPort))
|
||||
}
|
||||
|
||||
if wgNodes == 0 {
|
||||
return styleMuted.Render("No nodes have WireGuard configured.")
|
||||
}
|
||||
|
||||
expectedPeers := wgNodes - 1
|
||||
|
||||
// Per-node peer details
|
||||
b.WriteString("\n")
|
||||
b.WriteString(styleBold.Render("Peer Details"))
|
||||
b.WriteString("\n")
|
||||
b.WriteString(separator(width))
|
||||
b.WriteString("\n")
|
||||
|
||||
for _, r := range reports {
|
||||
if r.WireGuard == nil || len(r.WireGuard.Peers) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
b.WriteString("\n")
|
||||
host := nodeHost(r)
|
||||
peerCountStr := fmt.Sprintf("%d/%d peers", len(r.WireGuard.Peers), expectedPeers)
|
||||
if len(r.WireGuard.Peers) < expectedPeers {
|
||||
peerCountStr = styleCritical.Render(peerCountStr)
|
||||
} else {
|
||||
peerCountStr = styleHealthy.Render(peerCountStr)
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(" %s %s\n", styleBold.Render(host), peerCountStr))
|
||||
|
||||
for _, p := range r.WireGuard.Peers {
|
||||
b.WriteString(renderPeerLine(p))
|
||||
}
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// renderPeerLine formats a single WG peer.
|
||||
func renderPeerLine(p report.WGPeerInfo) string {
|
||||
keyShort := p.PublicKey
|
||||
if len(keyShort) > 12 {
|
||||
keyShort = keyShort[:12] + "..."
|
||||
}
|
||||
|
||||
// Handshake status
|
||||
var hsStr string
|
||||
if p.LatestHandshake == 0 {
|
||||
hsStr = styleCritical.Render("never")
|
||||
} else if p.HandshakeAgeSec > 180 {
|
||||
hsStr = styleWarning.Render(fmt.Sprintf("%ds ago", p.HandshakeAgeSec))
|
||||
} else {
|
||||
hsStr = styleHealthy.Render(fmt.Sprintf("%ds ago", p.HandshakeAgeSec))
|
||||
}
|
||||
|
||||
// Transfer
|
||||
rx := formatBytes(p.TransferRx)
|
||||
tx := formatBytes(p.TransferTx)
|
||||
|
||||
return fmt.Sprintf(" key=%s endpoint=%-22s hs=%s rx=%s tx=%s ips=%s\n",
|
||||
styleMuted.Render(keyShort),
|
||||
p.Endpoint,
|
||||
hsStr,
|
||||
rx, tx,
|
||||
p.AllowedIPs,
|
||||
)
|
||||
}
|
||||
|
||||
// formatBytes formats bytes into a human-readable string.
|
||||
func formatBytes(b int64) string {
|
||||
switch {
|
||||
case b >= 1<<30:
|
||||
return fmt.Sprintf("%.1fGB", float64(b)/(1<<30))
|
||||
case b >= 1<<20:
|
||||
return fmt.Sprintf("%.1fMB", float64(b)/(1<<20))
|
||||
case b >= 1<<10:
|
||||
return fmt.Sprintf("%.1fKB", float64(b)/(1<<10))
|
||||
default:
|
||||
return fmt.Sprintf("%dB", b)
|
||||
}
|
||||
}
|
||||
97
pkg/cli/production/report/anyone.go
Normal file
97
pkg/cli/production/report/anyone.go
Normal file
@ -0,0 +1,97 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectAnyone gathers Anyone Protocol relay/client health information.
|
||||
func collectAnyone() *AnyoneReport {
|
||||
r := &AnyoneReport{}
|
||||
|
||||
// 1. RelayActive: systemctl is-active orama-anyone-relay
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-anyone-relay"); err == nil {
|
||||
r.RelayActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. ClientActive: systemctl is-active orama-anyone-client
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-anyone-client"); err == nil {
|
||||
r.ClientActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Mode: derive from active state
|
||||
if r.RelayActive {
|
||||
r.Mode = "relay"
|
||||
} else if r.ClientActive {
|
||||
r.Mode = "client"
|
||||
}
|
||||
|
||||
// 4. ORPortListening, SocksListening, ControlListening: check ports in ss -tlnp
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
|
||||
r.ORPortListening = portIsListening(out, 9001)
|
||||
r.SocksListening = portIsListening(out, 9050)
|
||||
r.ControlListening = portIsListening(out, 9051)
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Bootstrapped / BootstrapPct: parse last "Bootstrapped" line from notices.log
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "bash", "-c",
|
||||
`grep "Bootstrapped" /var/log/anon/notices.log 2>/dev/null | tail -1`); err == nil {
|
||||
out = strings.TrimSpace(out)
|
||||
if out != "" {
|
||||
// Parse percentage from lines like:
|
||||
// "... Bootstrapped 100% (done): Done"
|
||||
// "... Bootstrapped 85%: Loading relay descriptors"
|
||||
re := regexp.MustCompile(`Bootstrapped\s+(\d+)%`)
|
||||
if m := re.FindStringSubmatch(out); len(m) >= 2 {
|
||||
if pct, err := strconv.Atoi(m[1]); err == nil {
|
||||
r.BootstrapPct = pct
|
||||
r.Bootstrapped = pct == 100
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Fingerprint: read /var/lib/anon/fingerprint
|
||||
if data, err := os.ReadFile("/var/lib/anon/fingerprint"); err == nil {
|
||||
line := strings.TrimSpace(string(data))
|
||||
// The file may contain "nickname fingerprint" — extract just the fingerprint.
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) >= 2 {
|
||||
r.Fingerprint = fields[1]
|
||||
} else if len(fields) == 1 {
|
||||
r.Fingerprint = fields[0]
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Nickname: extract from anonrc config
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "bash", "-c",
|
||||
`grep "^Nickname" /etc/anon/anonrc 2>/dev/null | awk '{print $2}'`); err == nil {
|
||||
r.Nickname = strings.TrimSpace(out)
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
254
pkg/cli/production/report/dns.go
Normal file
254
pkg/cli/production/report/dns.go
Normal file
@ -0,0 +1,254 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectDNS gathers CoreDNS, Caddy, and DNS resolution health information.
|
||||
// Only called when /etc/coredns exists.
|
||||
func collectDNS() *DNSReport {
|
||||
r := &DNSReport{}
|
||||
|
||||
// Set TLS days to -1 by default (failure state).
|
||||
r.BaseTLSDaysLeft = -1
|
||||
r.WildTLSDaysLeft = -1
|
||||
|
||||
// 1. CoreDNSActive: systemctl is-active coredns
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "coredns"); err == nil {
|
||||
r.CoreDNSActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. CaddyActive: systemctl is-active caddy
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "caddy"); err == nil {
|
||||
r.CaddyActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Port53Bound: check :53 in ss -ulnp
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ss", "-ulnp"); err == nil {
|
||||
r.Port53Bound = strings.Contains(out, ":53 ") || strings.Contains(out, ":53\t")
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Port80Bound and Port443Bound: check in ss -tlnp
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
|
||||
r.Port80Bound = strings.Contains(out, ":80 ") || strings.Contains(out, ":80\t")
|
||||
r.Port443Bound = strings.Contains(out, ":443 ") || strings.Contains(out, ":443\t")
|
||||
}
|
||||
}
|
||||
|
||||
// 5. CoreDNSMemMB: ps -C coredns -o rss=
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ps", "-C", "coredns", "-o", "rss=", "--no-headers"); err == nil {
|
||||
line := strings.TrimSpace(out)
|
||||
if line != "" {
|
||||
first := strings.Fields(line)[0]
|
||||
if kb, err := strconv.Atoi(first); err == nil {
|
||||
r.CoreDNSMemMB = kb / 1024
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. CoreDNSRestarts: systemctl show coredns --property=NRestarts
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "show", "coredns", "--property=NRestarts"); err == nil {
|
||||
props := parseProperties(out)
|
||||
r.CoreDNSRestarts = parseInt(props["NRestarts"])
|
||||
}
|
||||
}
|
||||
|
||||
// 7. LogErrors: grep errors from coredns journal (last 5 min)
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "bash", "-c",
|
||||
`journalctl -u coredns --no-pager -n 100 --since "5 min ago" 2>/dev/null | grep -ciE "(error|ERR)" || echo 0`); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.LogErrors = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 8. CorefileExists: check /etc/coredns/Corefile
|
||||
if _, err := os.Stat("/etc/coredns/Corefile"); err == nil {
|
||||
r.CorefileExists = true
|
||||
}
|
||||
|
||||
// Parse domain from Corefile for DNS resolution tests.
|
||||
domain := parseDomain()
|
||||
if domain == "" {
|
||||
return r
|
||||
}
|
||||
|
||||
// 9. SOAResolves: dig SOA
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "SOA", domain, "+short", "+time=2"); err == nil {
|
||||
r.SOAResolves = strings.TrimSpace(out) != ""
|
||||
}
|
||||
}
|
||||
|
||||
// 10. NSResolves and NSRecordCount: dig NS
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "NS", domain, "+short", "+time=2"); err == nil {
|
||||
out = strings.TrimSpace(out)
|
||||
if out != "" {
|
||||
r.NSResolves = true
|
||||
lines := strings.Split(out, "\n")
|
||||
count := 0
|
||||
for _, l := range lines {
|
||||
if strings.TrimSpace(l) != "" {
|
||||
count++
|
||||
}
|
||||
}
|
||||
r.NSRecordCount = count
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 11. WildcardResolves: dig A test.<domain>
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "A", "test."+domain, "+short", "+time=2"); err == nil {
|
||||
r.WildcardResolves = strings.TrimSpace(out) != ""
|
||||
}
|
||||
}
|
||||
|
||||
// 12. BaseAResolves: dig A <domain>
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "dig", "@127.0.0.1", "A", domain, "+short", "+time=2"); err == nil {
|
||||
r.BaseAResolves = strings.TrimSpace(out) != ""
|
||||
}
|
||||
}
|
||||
|
||||
// 13. BaseTLSDaysLeft: check TLS cert expiry for base domain
|
||||
r.BaseTLSDaysLeft = checkTLSDaysLeft(domain, domain)
|
||||
|
||||
// 14. WildTLSDaysLeft: check TLS cert expiry for wildcard
|
||||
r.WildTLSDaysLeft = checkTLSDaysLeft("*."+domain, domain)
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// parseDomain reads /etc/coredns/Corefile and extracts the base domain.
|
||||
// It looks for zone block declarations like "example.com {" or "*.example.com {"
|
||||
// and returns the base domain (without wildcard prefix).
|
||||
func parseDomain() string {
|
||||
data, err := os.ReadFile("/etc/coredns/Corefile")
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
content := string(data)
|
||||
|
||||
// Look for domain patterns in the Corefile.
|
||||
// Common patterns:
|
||||
// example.com {
|
||||
// *.example.com {
|
||||
// example.com:53 {
|
||||
// We want to find a real domain, not "." (root zone).
|
||||
domainRe := regexp.MustCompile(`(?m)^\s*\*?\.?([a-zA-Z0-9][-a-zA-Z0-9]*\.[a-zA-Z0-9][-a-zA-Z0-9.]*[a-zA-Z])(?::\d+)?\s*\{`)
|
||||
matches := domainRe.FindStringSubmatch(content)
|
||||
if len(matches) >= 2 {
|
||||
return matches[1]
|
||||
}
|
||||
|
||||
// Fallback: look for any line that looks like a domain block declaration.
|
||||
for _, line := range strings.Split(content, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Strip trailing "{" and port suffix.
|
||||
line = strings.TrimSuffix(line, "{")
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
// Remove port if present.
|
||||
if idx := strings.LastIndex(line, ":"); idx > 0 {
|
||||
if _, err := strconv.Atoi(line[idx+1:]); err == nil {
|
||||
line = line[:idx]
|
||||
}
|
||||
}
|
||||
|
||||
// Strip wildcard prefix.
|
||||
line = strings.TrimPrefix(line, "*.")
|
||||
|
||||
// Check if it looks like a domain (has at least one dot and no spaces).
|
||||
if strings.Contains(line, ".") && !strings.Contains(line, " ") && line != "." {
|
||||
return strings.TrimSpace(line)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// checkTLSDaysLeft uses openssl to check the TLS certificate expiry date
|
||||
// for a given servername connecting to localhost:443.
|
||||
// Returns days until expiry, or -1 on any failure.
|
||||
func checkTLSDaysLeft(servername, domain string) int {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := `echo | openssl s_client -servername ` + servername + ` -connect localhost:443 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null`
|
||||
out, err := runCmd(ctx, "bash", "-c", cmd)
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
|
||||
// Output looks like: "notAfter=Mar 15 12:00:00 2025 GMT"
|
||||
out = strings.TrimSpace(out)
|
||||
if !strings.HasPrefix(out, "notAfter=") {
|
||||
return -1
|
||||
}
|
||||
|
||||
dateStr := strings.TrimPrefix(out, "notAfter=")
|
||||
dateStr = strings.TrimSpace(dateStr)
|
||||
|
||||
// Parse the date. OpenSSL uses the format: "Jan 2 15:04:05 2006 GMT"
|
||||
layouts := []string{
|
||||
"Jan 2 15:04:05 2006 GMT",
|
||||
"Jan 2 15:04:05 2006 GMT",
|
||||
"Jan 02 15:04:05 2006 GMT",
|
||||
}
|
||||
|
||||
for _, layout := range layouts {
|
||||
t, err := time.Parse(layout, dateStr)
|
||||
if err == nil {
|
||||
days := int(math.Floor(time.Until(t).Hours() / 24))
|
||||
return days
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
63
pkg/cli/production/report/gateway.go
Normal file
63
pkg/cli/production/report/gateway.go
Normal file
@ -0,0 +1,63 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectGateway checks the main gateway health endpoint and parses subsystem status.
|
||||
func collectGateway() *GatewayReport {
|
||||
r := &GatewayReport{}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost:6001/v1/health", nil)
|
||||
if err != nil {
|
||||
return r
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
r.Responsive = false
|
||||
return r
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
r.Responsive = true
|
||||
r.HTTPStatus = resp.StatusCode
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return r
|
||||
}
|
||||
|
||||
// Try to parse the health response JSON.
|
||||
// Expected: {"status":"ok","version":"...","subsystems":{"rqlite":{"status":"ok","latency":"2ms"},...}}
|
||||
var health struct {
|
||||
Status string `json:"status"`
|
||||
Version string `json:"version"`
|
||||
Subsystems map[string]json.RawMessage `json:"subsystems"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(body, &health); err != nil {
|
||||
return r
|
||||
}
|
||||
|
||||
r.Version = health.Version
|
||||
|
||||
if len(health.Subsystems) > 0 {
|
||||
r.Subsystems = make(map[string]SubsystemHealth, len(health.Subsystems))
|
||||
for name, raw := range health.Subsystems {
|
||||
var sub SubsystemHealth
|
||||
if err := json.Unmarshal(raw, &sub); err == nil {
|
||||
r.Subsystems[name] = sub
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
148
pkg/cli/production/report/ipfs.go
Normal file
148
pkg/cli/production/report/ipfs.go
Normal file
@ -0,0 +1,148 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectIPFS gathers IPFS daemon and cluster health information.
|
||||
func collectIPFS() *IPFSReport {
|
||||
r := &IPFSReport{}
|
||||
|
||||
// 1. DaemonActive: systemctl is-active orama-ipfs
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-ipfs"); err == nil {
|
||||
r.DaemonActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. ClusterActive: systemctl is-active orama-ipfs-cluster
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-ipfs-cluster"); err == nil {
|
||||
r.ClusterActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. SwarmPeerCount: POST /api/v0/swarm/peers
|
||||
{
|
||||
body, err := ipfsPost("http://localhost:4501/api/v0/swarm/peers")
|
||||
if err == nil {
|
||||
var resp struct {
|
||||
Peers []interface{} `json:"Peers"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &resp); err == nil {
|
||||
r.SwarmPeerCount = len(resp.Peers)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. ClusterPeerCount: GET /peers
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
if body, err := httpGet(ctx, "http://localhost:9094/peers"); err == nil {
|
||||
var peers []interface{}
|
||||
if err := json.Unmarshal(body, &peers); err == nil {
|
||||
r.ClusterPeerCount = len(peers)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. RepoSizeBytes/RepoMaxBytes: POST /api/v0/repo/stat
|
||||
{
|
||||
body, err := ipfsPost("http://localhost:4501/api/v0/repo/stat")
|
||||
if err == nil {
|
||||
var resp struct {
|
||||
RepoSize int64 `json:"RepoSize"`
|
||||
StorageMax int64 `json:"StorageMax"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &resp); err == nil {
|
||||
r.RepoSizeBytes = resp.RepoSize
|
||||
r.RepoMaxBytes = resp.StorageMax
|
||||
if resp.StorageMax > 0 && resp.RepoSize > 0 {
|
||||
r.RepoUsePct = int(float64(resp.RepoSize) / float64(resp.StorageMax) * 100)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. KuboVersion: POST /api/v0/version
|
||||
{
|
||||
body, err := ipfsPost("http://localhost:4501/api/v0/version")
|
||||
if err == nil {
|
||||
var resp struct {
|
||||
Version string `json:"Version"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &resp); err == nil {
|
||||
r.KuboVersion = resp.Version
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 7. ClusterVersion: GET /id
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
if body, err := httpGet(ctx, "http://localhost:9094/id"); err == nil {
|
||||
var resp struct {
|
||||
Version string `json:"version"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &resp); err == nil {
|
||||
r.ClusterVersion = resp.Version
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 8. HasSwarmKey: check file existence
|
||||
if _, err := os.Stat("/opt/orama/.orama/data/ipfs/repo/swarm.key"); err == nil {
|
||||
r.HasSwarmKey = true
|
||||
}
|
||||
|
||||
// 9. BootstrapEmpty: POST /api/v0/bootstrap/list
|
||||
{
|
||||
body, err := ipfsPost("http://localhost:4501/api/v0/bootstrap/list")
|
||||
if err == nil {
|
||||
var resp struct {
|
||||
Peers []interface{} `json:"Peers"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &resp); err == nil {
|
||||
r.BootstrapEmpty = resp.Peers == nil || len(resp.Peers) == 0
|
||||
} else {
|
||||
// If we got a response but Peers is missing, treat as empty.
|
||||
r.BootstrapEmpty = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// ipfsPost sends a POST request with an empty body to an IPFS API endpoint.
|
||||
// IPFS uses POST for all API calls. Uses a 3-second timeout.
|
||||
func ipfsPost(url string) ([]byte, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(nil))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
return io.ReadAll(resp.Body)
|
||||
}
|
||||
205
pkg/cli/production/report/namespaces.go
Normal file
205
pkg/cli/production/report/namespaces.go
Normal file
@ -0,0 +1,205 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectNamespaces discovers deployed namespaces and checks health of their
|
||||
// per-namespace services (RQLite, Olric, Gateway).
|
||||
func collectNamespaces() []NamespaceReport {
|
||||
namespaces := discoverNamespaces()
|
||||
if len(namespaces) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var reports []NamespaceReport
|
||||
for _, ns := range namespaces {
|
||||
reports = append(reports, collectNamespaceReport(ns))
|
||||
}
|
||||
return reports
|
||||
}
|
||||
|
||||
type nsInfo struct {
|
||||
name string
|
||||
portBase int
|
||||
}
|
||||
|
||||
// discoverNamespaces finds deployed namespaces by looking for systemd service units
|
||||
// and/or the filesystem namespace directory.
|
||||
func discoverNamespaces() []nsInfo {
|
||||
var result []nsInfo
|
||||
seen := make(map[string]bool)
|
||||
|
||||
// Strategy 1: Glob for orama-deploy-*-rqlite.service files.
|
||||
matches, _ := filepath.Glob("/etc/systemd/system/orama-deploy-*-rqlite.service")
|
||||
for _, path := range matches {
|
||||
base := filepath.Base(path)
|
||||
// Extract namespace name: orama-deploy-<name>-rqlite.service
|
||||
name := strings.TrimPrefix(base, "orama-deploy-")
|
||||
name = strings.TrimSuffix(name, "-rqlite.service")
|
||||
if name == "" || seen[name] {
|
||||
continue
|
||||
}
|
||||
seen[name] = true
|
||||
|
||||
portBase := parsePortBaseFromUnit(path)
|
||||
if portBase > 0 {
|
||||
result = append(result, nsInfo{name: name, portBase: portBase})
|
||||
}
|
||||
}
|
||||
|
||||
// Strategy 2: Check filesystem for any namespaces not found via systemd.
|
||||
nsDir := "/opt/orama/.orama/data/namespaces"
|
||||
entries, err := os.ReadDir(nsDir)
|
||||
if err == nil {
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() || seen[entry.Name()] {
|
||||
continue
|
||||
}
|
||||
name := entry.Name()
|
||||
seen[name] = true
|
||||
|
||||
// Try to find the port base from a corresponding service unit.
|
||||
unitPath := fmt.Sprintf("/etc/systemd/system/orama-deploy-%s-rqlite.service", name)
|
||||
portBase := parsePortBaseFromUnit(unitPath)
|
||||
if portBase > 0 {
|
||||
result = append(result, nsInfo{name: name, portBase: portBase})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// parsePortBaseFromUnit reads a systemd unit file and extracts the port base
|
||||
// from ExecStart arguments or environment variables.
|
||||
//
|
||||
// It looks for patterns like:
|
||||
// - "-http-addr localhost:PORT" or "-http-addr 0.0.0.0:PORT" in ExecStart
|
||||
// - "PORT_BASE=NNNN" in environment files
|
||||
// - Any port number that appears to be the RQLite HTTP port (the base port)
|
||||
func parsePortBaseFromUnit(unitPath string) int {
|
||||
data, err := os.ReadFile(unitPath)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
content := string(data)
|
||||
|
||||
// Look for -http-addr with a port number in ExecStart line.
|
||||
httpAddrRe := regexp.MustCompile(`-http-addr\s+\S+:(\d+)`)
|
||||
if m := httpAddrRe.FindStringSubmatch(content); len(m) >= 2 {
|
||||
if port, err := strconv.Atoi(m[1]); err == nil {
|
||||
return port
|
||||
}
|
||||
}
|
||||
|
||||
// Look for a port in -addr or -http flags.
|
||||
addrRe := regexp.MustCompile(`(?:-addr|-http)\s+\S*:(\d+)`)
|
||||
if m := addrRe.FindStringSubmatch(content); len(m) >= 2 {
|
||||
if port, err := strconv.Atoi(m[1]); err == nil {
|
||||
return port
|
||||
}
|
||||
}
|
||||
|
||||
// Look for PORT_BASE environment variable in EnvironmentFile or Environment= directives.
|
||||
portBaseRe := regexp.MustCompile(`PORT_BASE=(\d+)`)
|
||||
if m := portBaseRe.FindStringSubmatch(content); len(m) >= 2 {
|
||||
if port, err := strconv.Atoi(m[1]); err == nil {
|
||||
return port
|
||||
}
|
||||
}
|
||||
|
||||
// Check referenced EnvironmentFile for PORT_BASE.
|
||||
envFileRe := regexp.MustCompile(`EnvironmentFile=(.+)`)
|
||||
if m := envFileRe.FindStringSubmatch(content); len(m) >= 2 {
|
||||
envPath := strings.TrimSpace(m[1])
|
||||
envPath = strings.TrimPrefix(envPath, "-") // optional prefix means "ignore if missing"
|
||||
if envData, err := os.ReadFile(envPath); err == nil {
|
||||
if m2 := portBaseRe.FindStringSubmatch(string(envData)); len(m2) >= 2 {
|
||||
if port, err := strconv.Atoi(m2[1]); err == nil {
|
||||
return port
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
// collectNamespaceReport checks the health of services for a single namespace.
|
||||
func collectNamespaceReport(ns nsInfo) NamespaceReport {
|
||||
r := NamespaceReport{
|
||||
Name: ns.name,
|
||||
PortBase: ns.portBase,
|
||||
}
|
||||
|
||||
// 1. RQLiteUp + RQLiteState: GET http://localhost:<port_base>/status
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
url := fmt.Sprintf("http://localhost:%d/status", ns.portBase)
|
||||
if body, err := httpGet(ctx, url); err == nil {
|
||||
r.RQLiteUp = true
|
||||
|
||||
var status map[string]interface{}
|
||||
if err := json.Unmarshal(body, &status); err == nil {
|
||||
r.RQLiteState = getNestedString(status, "store", "raft", "state")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. RQLiteReady: GET http://localhost:<port_base>/readyz
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
url := fmt.Sprintf("http://localhost:%d/readyz", ns.portBase)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err == nil {
|
||||
if resp, err := http.DefaultClient.Do(req); err == nil {
|
||||
io.Copy(io.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
r.RQLiteReady = resp.StatusCode == http.StatusOK
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. OlricUp: check if port_base+2 is listening
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
|
||||
r.OlricUp = portIsListening(out, ns.portBase+2)
|
||||
}
|
||||
}
|
||||
|
||||
// 4. GatewayUp + GatewayStatus: GET http://localhost:<port_base+4>/v1/health
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
url := fmt.Sprintf("http://localhost:%d/v1/health", ns.portBase+4)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err == nil {
|
||||
if resp, err := http.DefaultClient.Do(req); err == nil {
|
||||
io.Copy(io.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
r.GatewayUp = true
|
||||
r.GatewayStatus = resp.StatusCode
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
253
pkg/cli/production/report/network.go
Normal file
253
pkg/cli/production/report/network.go
Normal file
@ -0,0 +1,253 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectNetwork gathers network connectivity, TCP stats, listening ports,
|
||||
// and firewall status.
|
||||
func collectNetwork() *NetworkReport {
|
||||
r := &NetworkReport{}
|
||||
|
||||
// 1. InternetReachable: ping 8.8.8.8
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if _, err := runCmd(ctx, "ping", "-c", "1", "-W", "2", "8.8.8.8"); err == nil {
|
||||
r.InternetReachable = true
|
||||
}
|
||||
}
|
||||
|
||||
// 2. DefaultRoute: ip route show default
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ip", "route", "show", "default"); err == nil {
|
||||
r.DefaultRoute = strings.TrimSpace(out) != ""
|
||||
}
|
||||
}
|
||||
|
||||
// 3. WGRouteExists: ip route show dev wg0
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ip", "route", "show", "dev", "wg0"); err == nil {
|
||||
r.WGRouteExists = strings.TrimSpace(out) != ""
|
||||
}
|
||||
}
|
||||
|
||||
// 4. TCPEstablished / TCPTimeWait: parse `ss -s`
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ss", "-s"); err == nil {
|
||||
for _, line := range strings.Split(out, "\n") {
|
||||
lower := strings.ToLower(line)
|
||||
if strings.HasPrefix(lower, "tcp:") || strings.Contains(lower, "estab") {
|
||||
// Parse "estab N" and "timewait N" patterns from the line.
|
||||
r.TCPEstablished = extractSSCount(line, "estab")
|
||||
r.TCPTimeWait = extractSSCount(line, "timewait")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. TCPRetransRate: read /proc/net/snmp
|
||||
{
|
||||
if data, err := os.ReadFile("/proc/net/snmp"); err == nil {
|
||||
r.TCPRetransRate = parseTCPRetransRate(string(data))
|
||||
}
|
||||
}
|
||||
|
||||
// 6. ListeningPorts: ss -tlnp (TCP) + ss -ulnp (UDP)
|
||||
{
|
||||
seen := make(map[string]bool)
|
||||
|
||||
ctx1, cancel1 := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel1()
|
||||
if out, err := runCmd(ctx1, "ss", "-tlnp"); err == nil {
|
||||
for _, pi := range parseSSListening(out, "tcp") {
|
||||
key := strconv.Itoa(pi.Port) + "/" + pi.Proto
|
||||
if !seen[key] {
|
||||
seen[key] = true
|
||||
r.ListeningPorts = append(r.ListeningPorts, pi)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel2()
|
||||
if out, err := runCmd(ctx2, "ss", "-ulnp"); err == nil {
|
||||
for _, pi := range parseSSListening(out, "udp") {
|
||||
key := strconv.Itoa(pi.Port) + "/" + pi.Proto
|
||||
if !seen[key] {
|
||||
seen[key] = true
|
||||
r.ListeningPorts = append(r.ListeningPorts, pi)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by port number for consistent output.
|
||||
sort.Slice(r.ListeningPorts, func(i, j int) bool {
|
||||
if r.ListeningPorts[i].Port != r.ListeningPorts[j].Port {
|
||||
return r.ListeningPorts[i].Port < r.ListeningPorts[j].Port
|
||||
}
|
||||
return r.ListeningPorts[i].Proto < r.ListeningPorts[j].Proto
|
||||
})
|
||||
}
|
||||
|
||||
// 7. UFWActive: ufw status
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ufw", "status"); err == nil {
|
||||
r.UFWActive = strings.Contains(out, "Status: active")
|
||||
}
|
||||
}
|
||||
|
||||
// 8. UFWRules: ufw status numbered
|
||||
if r.UFWActive {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ufw", "status", "numbered"); err == nil {
|
||||
r.UFWRules = parseUFWRules(out)
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// extractSSCount finds a pattern like "estab 42" or "timewait 7" in an ss -s line.
|
||||
func extractSSCount(line, keyword string) int {
|
||||
re := regexp.MustCompile(keyword + `\s+(\d+)`)
|
||||
m := re.FindStringSubmatch(line)
|
||||
if len(m) >= 2 {
|
||||
if n, err := strconv.Atoi(m[1]); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// parseTCPRetransRate parses /proc/net/snmp content to compute
|
||||
// RetransSegs / OutSegs * 100.
|
||||
//
|
||||
// The file has paired lines: a header line followed by a values line.
|
||||
// We look for the "Tcp:" header and extract RetransSegs and OutSegs.
|
||||
func parseTCPRetransRate(data string) float64 {
|
||||
lines := strings.Split(data, "\n")
|
||||
for i := 0; i+1 < len(lines); i++ {
|
||||
if !strings.HasPrefix(lines[i], "Tcp:") {
|
||||
continue
|
||||
}
|
||||
header := strings.Fields(lines[i])
|
||||
values := strings.Fields(lines[i+1])
|
||||
if !strings.HasPrefix(lines[i+1], "Tcp:") || len(header) != len(values) {
|
||||
continue
|
||||
}
|
||||
|
||||
var outSegs, retransSegs float64
|
||||
for j, h := range header {
|
||||
switch h {
|
||||
case "OutSegs":
|
||||
if v, err := strconv.ParseFloat(values[j], 64); err == nil {
|
||||
outSegs = v
|
||||
}
|
||||
case "RetransSegs":
|
||||
if v, err := strconv.ParseFloat(values[j], 64); err == nil {
|
||||
retransSegs = v
|
||||
}
|
||||
}
|
||||
}
|
||||
if outSegs > 0 {
|
||||
return retransSegs / outSegs * 100
|
||||
}
|
||||
return 0
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// parseSSListening parses the output of `ss -tlnp` or `ss -ulnp` to extract
|
||||
// port numbers and process names.
|
||||
func parseSSListening(output, proto string) []PortInfo {
|
||||
var ports []PortInfo
|
||||
processRe := regexp.MustCompile(`users:\(\("([^"]+)"`)
|
||||
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
// Skip header and empty lines.
|
||||
if line == "" || strings.HasPrefix(line, "State") || strings.HasPrefix(line, "Netid") {
|
||||
continue
|
||||
}
|
||||
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 4 {
|
||||
continue
|
||||
}
|
||||
|
||||
// The local address:port is typically the 4th field (index 3) for ss -tlnp
|
||||
// or the 5th field (index 4) for some formats. We look for a field with ":PORT".
|
||||
localAddr := ""
|
||||
for _, f := range fields {
|
||||
if strings.Contains(f, ":") && !strings.HasPrefix(f, "users:") {
|
||||
// Could be *:port, 0.0.0.0:port, [::]:port, 127.0.0.1:port, etc.
|
||||
if idx := strings.LastIndex(f, ":"); idx >= 0 {
|
||||
portStr := f[idx+1:]
|
||||
if _, err := strconv.Atoi(portStr); err == nil {
|
||||
localAddr = f
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if localAddr == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
idx := strings.LastIndex(localAddr, ":")
|
||||
if idx < 0 {
|
||||
continue
|
||||
}
|
||||
portStr := localAddr[idx+1:]
|
||||
port, err := strconv.Atoi(portStr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
process := ""
|
||||
if m := processRe.FindStringSubmatch(line); len(m) >= 2 {
|
||||
process = m[1]
|
||||
}
|
||||
|
||||
ports = append(ports, PortInfo{
|
||||
Port: port,
|
||||
Proto: proto,
|
||||
Process: process,
|
||||
})
|
||||
}
|
||||
return ports
|
||||
}
|
||||
|
||||
// parseUFWRules extracts rule lines from `ufw status numbered` output.
|
||||
// Skips the header lines (Status, To, ---, blank lines).
|
||||
func parseUFWRules(output string) []string {
|
||||
var rules []string
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
// Rule lines start with "[ N]" pattern.
|
||||
if strings.HasPrefix(line, "[") && strings.Contains(line, "]") {
|
||||
rules = append(rules, line)
|
||||
}
|
||||
}
|
||||
return rules
|
||||
}
|
||||
150
pkg/cli/production/report/olric.go
Normal file
150
pkg/cli/production/report/olric.go
Normal file
@ -0,0 +1,150 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectOlric gathers Olric distributed cache health information.
|
||||
func collectOlric() *OlricReport {
|
||||
r := &OlricReport{}
|
||||
|
||||
// 1. ServiceActive: systemctl is-active orama-olric
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "orama-olric"); err == nil {
|
||||
r.ServiceActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. MemberlistUp: check if port 3322 is listening
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ss", "-tlnp"); err == nil {
|
||||
r.MemberlistUp = portIsListening(out, 3322)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. RestartCount: systemctl show NRestarts
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "show", "orama-olric", "--property=NRestarts"); err == nil {
|
||||
props := parseProperties(out)
|
||||
r.RestartCount = parseInt(props["NRestarts"])
|
||||
}
|
||||
}
|
||||
|
||||
// 4. ProcessMemMB: ps -C olric-server -o rss=
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ps", "-C", "olric-server", "-o", "rss=", "--no-headers"); err == nil {
|
||||
line := strings.TrimSpace(out)
|
||||
if line != "" {
|
||||
// May have multiple lines if multiple processes; take the first.
|
||||
first := strings.Fields(line)[0]
|
||||
if kb, err := strconv.Atoi(first); err == nil {
|
||||
r.ProcessMemMB = kb / 1024
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. LogErrors: grep errors from journal
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "bash", "-c",
|
||||
`journalctl -u orama-olric --no-pager -n 200 --since "1 hour ago" 2>/dev/null | grep -ciE "(error|ERR)" || echo 0`); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.LogErrors = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. LogSuspects: grep suspect/marking failed/dead
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "bash", "-c",
|
||||
`journalctl -u orama-olric --no-pager -n 200 --since "1 hour ago" 2>/dev/null | grep -ciE "(suspect|marking.*(failed|dead))" || echo 0`); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.LogSuspects = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 7. LogFlapping: grep memberlist join/leave
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "bash", "-c",
|
||||
`journalctl -u orama-olric --no-pager -n 200 --since "1 hour ago" 2>/dev/null | grep -ciE "(memberlist.*(join|leave))" || echo 0`); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.LogFlapping = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 8. Member info: try HTTP GET to http://localhost:3320/
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if body, err := httpGet(ctx, "http://localhost:3320/"); err == nil {
|
||||
var info struct {
|
||||
Coordinator string `json:"coordinator"`
|
||||
Members []struct {
|
||||
Name string `json:"name"`
|
||||
} `json:"members"`
|
||||
// Some Olric versions expose a flat member list or a different structure.
|
||||
}
|
||||
if err := json.Unmarshal(body, &info); err == nil {
|
||||
r.Coordinator = info.Coordinator
|
||||
r.MemberCount = len(info.Members)
|
||||
for _, m := range info.Members {
|
||||
r.Members = append(r.Members, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: try to extract member count from a different JSON layout.
|
||||
if r.MemberCount == 0 {
|
||||
var raw map[string]interface{}
|
||||
if err := json.Unmarshal(body, &raw); err == nil {
|
||||
if members, ok := raw["members"]; ok {
|
||||
if arr, ok := members.([]interface{}); ok {
|
||||
r.MemberCount = len(arr)
|
||||
for _, m := range arr {
|
||||
if s, ok := m.(string); ok {
|
||||
r.Members = append(r.Members, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if coord, ok := raw["coordinator"].(string); ok && r.Coordinator == "" {
|
||||
r.Coordinator = coord
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// portIsListening checks if a given port number appears in ss -tlnp output.
|
||||
func portIsListening(ssOutput string, port int) bool {
|
||||
portStr := ":" + strconv.Itoa(port)
|
||||
for _, line := range strings.Split(ssOutput, "\n") {
|
||||
if strings.Contains(line, portStr) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
89
pkg/cli/production/report/processes.go
Normal file
89
pkg/cli/production/report/processes.go
Normal file
@ -0,0 +1,89 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// oramaProcessNames lists command substrings that identify orama-related processes.
|
||||
var oramaProcessNames = []string{
|
||||
"orama", "rqlite", "olric", "ipfs", "caddy", "coredns",
|
||||
}
|
||||
|
||||
// collectProcesses gathers zombie/orphan process info and panic counts from logs.
|
||||
func collectProcesses() *ProcessReport {
|
||||
r := &ProcessReport{}
|
||||
|
||||
// Run ps once and reuse the output for both zombies and orphans.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
|
||||
out, err := runCmd(ctx, "ps", "-eo", "pid,ppid,state,comm", "--no-headers")
|
||||
if err == nil {
|
||||
for _, line := range strings.Split(out, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 4 {
|
||||
continue
|
||||
}
|
||||
|
||||
pid, _ := strconv.Atoi(fields[0])
|
||||
ppid, _ := strconv.Atoi(fields[1])
|
||||
state := fields[2]
|
||||
command := strings.Join(fields[3:], " ")
|
||||
|
||||
proc := ProcessInfo{
|
||||
PID: pid,
|
||||
PPID: ppid,
|
||||
State: state,
|
||||
Command: command,
|
||||
}
|
||||
|
||||
// Zombies: state == "Z"
|
||||
if state == "Z" {
|
||||
r.Zombies = append(r.Zombies, proc)
|
||||
}
|
||||
|
||||
// Orphans: PPID == 1 and command contains an orama-related name.
|
||||
if ppid == 1 && isOramaProcess(command) {
|
||||
r.Orphans = append(r.Orphans, proc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.ZombieCount = len(r.Zombies)
|
||||
r.OrphanCount = len(r.Orphans)
|
||||
|
||||
// PanicCount: check journal for panic/fatal in last hour.
|
||||
{
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel2()
|
||||
|
||||
out, err := runCmd(ctx2, "bash", "-c",
|
||||
`journalctl -u orama-node --no-pager -n 500 --since "1 hour ago" 2>/dev/null | grep -ciE "(panic|fatal)" || echo 0`)
|
||||
if err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.PanicCount = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// isOramaProcess checks if a command string contains any orama-related process name.
|
||||
func isOramaProcess(command string) bool {
|
||||
lower := strings.ToLower(command)
|
||||
for _, name := range oramaProcessNames {
|
||||
if strings.Contains(lower, name) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
165
pkg/cli/production/report/report.go
Normal file
165
pkg/cli/production/report/report.go
Normal file
@ -0,0 +1,165 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Handle is the main entry point for `orama node report`.
|
||||
// It collects system, service, and component information in parallel,
|
||||
// then outputs the full NodeReport as JSON to stdout.
|
||||
func Handle(jsonFlag bool, version string) error {
|
||||
start := time.Now()
|
||||
|
||||
rpt := &NodeReport{
|
||||
Timestamp: start.UTC(),
|
||||
Version: version,
|
||||
}
|
||||
|
||||
if h, err := os.Hostname(); err == nil {
|
||||
rpt.Hostname = h
|
||||
}
|
||||
|
||||
var mu sync.Mutex
|
||||
addError := func(msg string) {
|
||||
mu.Lock()
|
||||
rpt.Errors = append(rpt.Errors, msg)
|
||||
mu.Unlock()
|
||||
}
|
||||
|
||||
// safeGo launches a collector goroutine with panic recovery.
|
||||
safeGo := func(wg *sync.WaitGroup, name string, fn func()) {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
addError(fmt.Sprintf("%s collector panicked: %v", name, r))
|
||||
}
|
||||
}()
|
||||
fn()
|
||||
}()
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
safeGo(&wg, "system", func() {
|
||||
rpt.System = collectSystem()
|
||||
})
|
||||
|
||||
safeGo(&wg, "services", func() {
|
||||
rpt.Services = collectServices()
|
||||
})
|
||||
|
||||
safeGo(&wg, "rqlite", func() {
|
||||
rpt.RQLite = collectRQLite()
|
||||
})
|
||||
|
||||
safeGo(&wg, "olric", func() {
|
||||
rpt.Olric = collectOlric()
|
||||
})
|
||||
|
||||
safeGo(&wg, "ipfs", func() {
|
||||
rpt.IPFS = collectIPFS()
|
||||
})
|
||||
|
||||
safeGo(&wg, "gateway", func() {
|
||||
rpt.Gateway = collectGateway()
|
||||
})
|
||||
|
||||
safeGo(&wg, "wireguard", func() {
|
||||
rpt.WireGuard = collectWireGuard()
|
||||
})
|
||||
|
||||
safeGo(&wg, "dns", func() {
|
||||
// Only collect DNS info if this node runs CoreDNS.
|
||||
if _, err := os.Stat("/etc/coredns"); err == nil {
|
||||
rpt.DNS = collectDNS()
|
||||
}
|
||||
})
|
||||
|
||||
safeGo(&wg, "anyone", func() {
|
||||
rpt.Anyone = collectAnyone()
|
||||
})
|
||||
|
||||
safeGo(&wg, "network", func() {
|
||||
rpt.Network = collectNetwork()
|
||||
})
|
||||
|
||||
safeGo(&wg, "processes", func() {
|
||||
rpt.Processes = collectProcesses()
|
||||
})
|
||||
|
||||
safeGo(&wg, "namespaces", func() {
|
||||
rpt.Namespaces = collectNamespaces()
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Populate top-level WireGuard IP from the WireGuard collector result.
|
||||
if rpt.WireGuard != nil && rpt.WireGuard.WgIP != "" {
|
||||
rpt.WGIP = rpt.WireGuard.WgIP
|
||||
}
|
||||
|
||||
rpt.CollectMS = time.Since(start).Milliseconds()
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
if !jsonFlag {
|
||||
enc.SetIndent("", " ")
|
||||
}
|
||||
return enc.Encode(rpt)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// runCmd executes an external command with a 4-second timeout and returns its
|
||||
// combined stdout as a trimmed string.
|
||||
func runCmd(ctx context.Context, name string, args ...string) (string, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 4*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, name, args...)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("%s: %w", name, err)
|
||||
}
|
||||
return strings.TrimSpace(string(out)), nil
|
||||
}
|
||||
|
||||
// httpGet performs an HTTP GET request with a 3-second timeout and returns the
|
||||
// response body bytes.
|
||||
func httpGet(ctx context.Context, url string) ([]byte, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return body, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url)
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
260
pkg/cli/production/report/rqlite.go
Normal file
260
pkg/cli/production/report/rqlite.go
Normal file
@ -0,0 +1,260 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
const rqliteBase = "http://localhost:5001"
|
||||
|
||||
// collectRQLite queries the local RQLite HTTP API to build a health report.
|
||||
func collectRQLite() *RQLiteReport {
|
||||
r := &RQLiteReport{}
|
||||
|
||||
// 1. GET /status — core Raft and node metadata.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
statusBody, err := httpGet(ctx, rqliteBase+"/status")
|
||||
if err != nil {
|
||||
r.Responsive = false
|
||||
return r
|
||||
}
|
||||
|
||||
var status map[string]interface{}
|
||||
if err := json.Unmarshal(statusBody, &status); err != nil {
|
||||
r.Responsive = false
|
||||
return r
|
||||
}
|
||||
r.Responsive = true
|
||||
|
||||
// Extract fields from the nested status JSON.
|
||||
r.RaftState = getNestedString(status, "store", "raft", "state")
|
||||
r.LeaderAddr = getNestedString(status, "store", "leader", "addr")
|
||||
r.LeaderID = getNestedString(status, "store", "leader", "node_id")
|
||||
r.NodeID = getNestedString(status, "store", "node_id")
|
||||
r.Term = uint64(getNestedFloat(status, "store", "raft", "current_term"))
|
||||
r.Applied = uint64(getNestedFloat(status, "store", "raft", "applied_index"))
|
||||
r.Commit = uint64(getNestedFloat(status, "store", "raft", "commit_index"))
|
||||
r.FsmPending = uint64(getNestedFloat(status, "store", "raft", "fsm_pending"))
|
||||
r.LastContact = getNestedString(status, "store", "raft", "last_contact")
|
||||
r.Voter = getNestedBool(status, "store", "raft", "voter")
|
||||
r.DBSize = getNestedString(status, "store", "sqlite3", "db_size_friendly")
|
||||
r.Uptime = getNestedString(status, "http", "uptime")
|
||||
r.Version = getNestedString(status, "build", "version")
|
||||
r.Goroutines = int(getNestedFloat(status, "runtime", "num_goroutine"))
|
||||
|
||||
// HeapMB: bytes → MB.
|
||||
heapBytes := getNestedFloat(status, "runtime", "memory", "heap_alloc")
|
||||
if heapBytes > 0 {
|
||||
r.HeapMB = int(heapBytes / (1024 * 1024))
|
||||
}
|
||||
|
||||
// NumPeers may be a number or a string in the JSON; handle both.
|
||||
r.NumPeers = getNestedInt(status, "store", "raft", "num_peers")
|
||||
|
||||
// 2. GET /nodes?nonvoters — cluster node list.
|
||||
{
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel2()
|
||||
|
||||
if body, err := httpGet(ctx2, rqliteBase+"/nodes?nonvoters"); err == nil {
|
||||
var rawNodes map[string]struct {
|
||||
Addr string `json:"addr"`
|
||||
Reachable bool `json:"reachable"`
|
||||
Leader bool `json:"leader"`
|
||||
Voter bool `json:"voter"`
|
||||
Time float64 `json:"time"`
|
||||
Error string `json:"error"`
|
||||
}
|
||||
if err := json.Unmarshal(body, &rawNodes); err == nil {
|
||||
r.Nodes = make(map[string]RQLiteNodeInfo, len(rawNodes))
|
||||
for id, n := range rawNodes {
|
||||
r.Nodes[id] = RQLiteNodeInfo{
|
||||
Reachable: n.Reachable,
|
||||
Leader: n.Leader,
|
||||
Voter: n.Voter,
|
||||
TimeMS: n.Time * 1000, // seconds → milliseconds
|
||||
Error: n.Error,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. GET /readyz — readiness probe.
|
||||
{
|
||||
ctx3, cancel3 := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel3()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx3, http.MethodGet, rqliteBase+"/readyz", nil)
|
||||
if err == nil {
|
||||
if resp, err := http.DefaultClient.Do(req); err == nil {
|
||||
resp.Body.Close()
|
||||
r.Ready = resp.StatusCode == http.StatusOK
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. POST /db/query?level=strong — strong read test.
|
||||
{
|
||||
ctx4, cancel4 := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel4()
|
||||
|
||||
payload := []byte(`["SELECT 1"]`)
|
||||
req, err := http.NewRequestWithContext(ctx4, http.MethodPost, rqliteBase+"/db/query?level=strong", bytes.NewReader(payload))
|
||||
if err == nil {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
if resp, err := http.DefaultClient.Do(req); err == nil {
|
||||
io.Copy(io.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
r.StrongRead = resp.StatusCode == http.StatusOK
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. GET /debug/vars — error counters.
|
||||
{
|
||||
ctx5, cancel5 := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel5()
|
||||
|
||||
if body, err := httpGet(ctx5, rqliteBase+"/debug/vars"); err == nil {
|
||||
var vars map[string]interface{}
|
||||
if err := json.Unmarshal(body, &vars); err == nil {
|
||||
r.DebugVars = &RQLiteDebugVarsReport{
|
||||
QueryErrors: jsonUint64(vars, "api_query_errors"),
|
||||
ExecuteErrors: jsonUint64(vars, "api_execute_errors"),
|
||||
RemoteExecErrors: jsonUint64(vars, "api_remote_exec_errors"),
|
||||
LeaderNotFound: jsonUint64(vars, "store_leader_not_found"),
|
||||
SnapshotErrors: jsonUint64(vars, "snapshot_errors"),
|
||||
ClientRetries: jsonUint64(vars, "client_retries"),
|
||||
ClientTimeouts: jsonUint64(vars, "client_timeouts"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Nested-map extraction helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// getNestedString traverses nested map[string]interface{} values and returns
|
||||
// the final value as a string. Returns "" if any key is missing or the leaf
|
||||
// is not a string.
|
||||
func getNestedString(m map[string]interface{}, keys ...string) string {
|
||||
v := getNestedValue(m, keys...)
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
return fmt.Sprintf("%v", v)
|
||||
}
|
||||
|
||||
// getNestedFloat traverses nested maps and returns the leaf as a float64.
|
||||
// JSON numbers are decoded as float64 by encoding/json into interface{}.
|
||||
func getNestedFloat(m map[string]interface{}, keys ...string) float64 {
|
||||
v := getNestedValue(m, keys...)
|
||||
if v == nil {
|
||||
return 0
|
||||
}
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return n
|
||||
case json.Number:
|
||||
if f, err := n.Float64(); err == nil {
|
||||
return f
|
||||
}
|
||||
case string:
|
||||
if f, err := strconv.ParseFloat(n, 64); err == nil {
|
||||
return f
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// getNestedBool traverses nested maps and returns the leaf as a bool.
|
||||
func getNestedBool(m map[string]interface{}, keys ...string) bool {
|
||||
v := getNestedValue(m, keys...)
|
||||
if v == nil {
|
||||
return false
|
||||
}
|
||||
if b, ok := v.(bool); ok {
|
||||
return b
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// getNestedInt traverses nested maps and returns the leaf as an int.
|
||||
// Handles both numeric and string representations (RQLite sometimes
|
||||
// returns num_peers as a string).
|
||||
func getNestedInt(m map[string]interface{}, keys ...string) int {
|
||||
v := getNestedValue(m, keys...)
|
||||
if v == nil {
|
||||
return 0
|
||||
}
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return int(n)
|
||||
case json.Number:
|
||||
if i, err := n.Int64(); err == nil {
|
||||
return int(i)
|
||||
}
|
||||
case string:
|
||||
if i, err := strconv.Atoi(n); err == nil {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// getNestedValue walks through nested map[string]interface{} following the
|
||||
// given key path and returns the leaf value, or nil if any step fails.
|
||||
func getNestedValue(m map[string]interface{}, keys ...string) interface{} {
|
||||
if len(keys) == 0 {
|
||||
return nil
|
||||
}
|
||||
current := interface{}(m)
|
||||
for _, key := range keys {
|
||||
cm, ok := current.(map[string]interface{})
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
current, ok = cm[key]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return current
|
||||
}
|
||||
|
||||
// jsonUint64 reads a top-level key from a flat map as uint64.
|
||||
func jsonUint64(m map[string]interface{}, key string) uint64 {
|
||||
v, ok := m[key]
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
switch n := v.(type) {
|
||||
case float64:
|
||||
return uint64(n)
|
||||
case json.Number:
|
||||
if i, err := n.Int64(); err == nil {
|
||||
return uint64(i)
|
||||
}
|
||||
case string:
|
||||
if i, err := strconv.ParseUint(n, 10, 64); err == nil {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
201
pkg/cli/production/report/services.go
Normal file
201
pkg/cli/production/report/services.go
Normal file
@ -0,0 +1,201 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var coreServices = []string{
|
||||
"orama-node",
|
||||
"orama-gateway",
|
||||
"orama-olric",
|
||||
"orama-ipfs",
|
||||
"orama-ipfs-cluster",
|
||||
"orama-anyone-relay",
|
||||
"orama-anyone-client",
|
||||
"coredns",
|
||||
"caddy",
|
||||
"wg-quick@wg0",
|
||||
}
|
||||
|
||||
func collectServices() *ServicesReport {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
report := &ServicesReport{}
|
||||
|
||||
// Collect core services.
|
||||
for _, name := range coreServices {
|
||||
info := collectServiceInfo(ctx, name)
|
||||
report.Services = append(report.Services, info)
|
||||
}
|
||||
|
||||
// Discover namespace services (orama-deploy-*.service).
|
||||
nsServices := discoverNamespaceServices()
|
||||
for _, name := range nsServices {
|
||||
info := collectServiceInfo(ctx, name)
|
||||
report.Services = append(report.Services, info)
|
||||
}
|
||||
|
||||
// Collect failed units.
|
||||
report.FailedUnits = collectFailedUnits(ctx)
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
func collectServiceInfo(ctx context.Context, name string) ServiceInfo {
|
||||
info := ServiceInfo{Name: name}
|
||||
|
||||
// Get all properties in a single systemctl show call.
|
||||
out, err := runCmd(ctx, "systemctl", "show", name,
|
||||
"--property=ActiveState,SubState,NRestarts,ActiveEnterTimestamp,MemoryCurrent,CPUUsageNSec,MainPID")
|
||||
if err != nil {
|
||||
info.ActiveState = "unknown"
|
||||
info.SubState = "unknown"
|
||||
return info
|
||||
}
|
||||
|
||||
props := parseProperties(out)
|
||||
|
||||
info.ActiveState = props["ActiveState"]
|
||||
info.SubState = props["SubState"]
|
||||
info.NRestarts = parseInt(props["NRestarts"])
|
||||
info.MainPID = parseInt(props["MainPID"])
|
||||
info.MemoryCurrentMB = parseMemoryMB(props["MemoryCurrent"])
|
||||
info.CPUUsageNSec = parseInt64(props["CPUUsageNSec"])
|
||||
|
||||
// Calculate uptime from ActiveEnterTimestamp.
|
||||
if ts := props["ActiveEnterTimestamp"]; ts != "" && ts != "n/a" {
|
||||
info.ActiveSinceSec = parseActiveSince(ts)
|
||||
}
|
||||
|
||||
// Check if service is enabled.
|
||||
enabledOut, err := runCmd(ctx, "systemctl", "is-enabled", name)
|
||||
if err == nil && strings.TrimSpace(enabledOut) == "enabled" {
|
||||
info.Enabled = true
|
||||
}
|
||||
|
||||
// Restart loop detection: restarted more than 3 times and running for less than 5 minutes.
|
||||
info.RestartLoopRisk = info.NRestarts > 3 && info.ActiveSinceSec > 0 && info.ActiveSinceSec < 300
|
||||
|
||||
return info
|
||||
}
|
||||
|
||||
// parseProperties parses "Key=Value" lines from systemctl show output into a map.
|
||||
func parseProperties(output string) map[string]string {
|
||||
props := make(map[string]string)
|
||||
for _, line := range strings.Split(output, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
idx := strings.IndexByte(line, '=')
|
||||
if idx < 0 {
|
||||
continue
|
||||
}
|
||||
key := line[:idx]
|
||||
value := line[idx+1:]
|
||||
props[key] = value
|
||||
}
|
||||
return props
|
||||
}
|
||||
|
||||
// parseMemoryMB converts a MemoryCurrent value (bytes as uint64, "[not set]", or "infinity") to MB.
|
||||
func parseMemoryMB(s string) int {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" || s == "[not set]" || s == "infinity" {
|
||||
return 0
|
||||
}
|
||||
bytes, err := strconv.ParseUint(s, 10, 64)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return int(bytes / (1024 * 1024))
|
||||
}
|
||||
|
||||
// parseActiveSince parses an ActiveEnterTimestamp like "Fri 2024-01-05 10:30:00 UTC"
|
||||
// and returns the number of seconds elapsed since that time.
|
||||
func parseActiveSince(ts string) int64 {
|
||||
// systemctl outputs timestamps in the form: "Day YYYY-MM-DD HH:MM:SS TZ"
|
||||
// e.g. "Fri 2024-01-05 10:30:00 UTC"
|
||||
layouts := []string{
|
||||
"Mon 2006-01-02 15:04:05 MST",
|
||||
"Mon 2006-01-02 15:04:05 -0700",
|
||||
}
|
||||
ts = strings.TrimSpace(ts)
|
||||
for _, layout := range layouts {
|
||||
t, err := time.Parse(layout, ts)
|
||||
if err == nil {
|
||||
sec := int64(time.Since(t).Seconds())
|
||||
if sec < 0 {
|
||||
return 0
|
||||
}
|
||||
return sec
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func parseInt(s string) int {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" || s == "[not set]" {
|
||||
return 0
|
||||
}
|
||||
v, _ := strconv.Atoi(s)
|
||||
return v
|
||||
}
|
||||
|
||||
func parseInt64(s string) int64 {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" || s == "[not set]" {
|
||||
return 0
|
||||
}
|
||||
v, _ := strconv.ParseInt(s, 10, 64)
|
||||
return v
|
||||
}
|
||||
|
||||
// collectFailedUnits runs `systemctl --failed` and extracts unit names from the first column.
|
||||
func collectFailedUnits(ctx context.Context) []string {
|
||||
out, err := runCmd(ctx, "systemctl", "--failed", "--no-legend", "--no-pager")
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var units []string
|
||||
for _, line := range strings.Split(out, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) > 0 {
|
||||
// First column may have a bullet prefix; strip common markers.
|
||||
unit := strings.TrimLeft(fields[0], "●* ")
|
||||
if unit != "" {
|
||||
units = append(units, unit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return units
|
||||
}
|
||||
|
||||
// discoverNamespaceServices finds orama-deploy-*.service files in /etc/systemd/system
|
||||
// and returns the service names (without the .service suffix path).
|
||||
func discoverNamespaceServices() []string {
|
||||
matches, err := filepath.Glob("/etc/systemd/system/orama-deploy-*.service")
|
||||
if err != nil || len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var services []string
|
||||
for _, path := range matches {
|
||||
base := filepath.Base(path)
|
||||
// Strip the .service suffix to get the unit name.
|
||||
name := strings.TrimSuffix(base, ".service")
|
||||
services = append(services, name)
|
||||
}
|
||||
return services
|
||||
}
|
||||
200
pkg/cli/production/report/system.go
Normal file
200
pkg/cli/production/report/system.go
Normal file
@ -0,0 +1,200 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectSystem gathers system-level metrics using local commands and /proc files.
|
||||
func collectSystem() *SystemReport {
|
||||
r := &SystemReport{}
|
||||
|
||||
// 1. Uptime seconds: read /proc/uptime, parse first field
|
||||
if data, err := os.ReadFile("/proc/uptime"); err == nil {
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) >= 1 {
|
||||
if f, err := strconv.ParseFloat(fields[0], 64); err == nil {
|
||||
r.UptimeSeconds = int64(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Uptime since: run `uptime -s`
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "uptime", "-s"); err == nil {
|
||||
r.UptimeSince = strings.TrimSpace(out)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. CPU count: run `nproc`
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "nproc"); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.CPUCount = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Load averages: read /proc/loadavg, parse first 3 fields
|
||||
if data, err := os.ReadFile("/proc/loadavg"); err == nil {
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) >= 3 {
|
||||
if f, err := strconv.ParseFloat(fields[0], 64); err == nil {
|
||||
r.LoadAvg1 = f
|
||||
}
|
||||
if f, err := strconv.ParseFloat(fields[1], 64); err == nil {
|
||||
r.LoadAvg5 = f
|
||||
}
|
||||
if f, err := strconv.ParseFloat(fields[2], 64); err == nil {
|
||||
r.LoadAvg15 = f
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5 & 6. Memory and swap: run `free -m`, parse Mem: and Swap: lines
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "free", "-m"); err == nil {
|
||||
for _, line := range strings.Split(out, "\n") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) >= 4 && fields[0] == "Mem:" {
|
||||
// Mem: total used free shared buff/cache available
|
||||
if n, err := strconv.Atoi(fields[1]); err == nil {
|
||||
r.MemTotalMB = n
|
||||
}
|
||||
if n, err := strconv.Atoi(fields[2]); err == nil {
|
||||
r.MemUsedMB = n
|
||||
}
|
||||
if n, err := strconv.Atoi(fields[3]); err == nil {
|
||||
r.MemFreeMB = n
|
||||
}
|
||||
if len(fields) >= 7 {
|
||||
if n, err := strconv.Atoi(fields[6]); err == nil {
|
||||
r.MemAvailMB = n
|
||||
}
|
||||
}
|
||||
if r.MemTotalMB > 0 {
|
||||
r.MemUsePct = (r.MemTotalMB - r.MemAvailMB) * 100 / r.MemTotalMB
|
||||
}
|
||||
}
|
||||
if len(fields) >= 3 && fields[0] == "Swap:" {
|
||||
if n, err := strconv.Atoi(fields[1]); err == nil {
|
||||
r.SwapTotalMB = n
|
||||
}
|
||||
if n, err := strconv.Atoi(fields[2]); err == nil {
|
||||
r.SwapUsedMB = n
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 7. Disk usage: run `df -h /` and `df -h /opt/orama`, use whichever has higher usage
|
||||
{
|
||||
type diskInfo struct {
|
||||
total string
|
||||
used string
|
||||
avail string
|
||||
usePct int
|
||||
}
|
||||
|
||||
parseDf := func(out string) *diskInfo {
|
||||
lines := strings.Split(out, "\n")
|
||||
if len(lines) < 2 {
|
||||
return nil
|
||||
}
|
||||
fields := strings.Fields(lines[1])
|
||||
if len(fields) < 5 {
|
||||
return nil
|
||||
}
|
||||
pctStr := strings.TrimSuffix(fields[4], "%")
|
||||
pct, err := strconv.Atoi(pctStr)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &diskInfo{
|
||||
total: fields[1],
|
||||
used: fields[2],
|
||||
avail: fields[3],
|
||||
usePct: pct,
|
||||
}
|
||||
}
|
||||
|
||||
ctx1, cancel1 := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel1()
|
||||
rootDisk := (*diskInfo)(nil)
|
||||
if out, err := runCmd(ctx1, "df", "-h", "/"); err == nil {
|
||||
rootDisk = parseDf(out)
|
||||
}
|
||||
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel2()
|
||||
optDisk := (*diskInfo)(nil)
|
||||
if out, err := runCmd(ctx2, "df", "-h", "/opt/orama"); err == nil {
|
||||
optDisk = parseDf(out)
|
||||
}
|
||||
|
||||
best := rootDisk
|
||||
if optDisk != nil && (best == nil || optDisk.usePct > best.usePct) {
|
||||
best = optDisk
|
||||
}
|
||||
if best != nil {
|
||||
r.DiskTotalGB = best.total
|
||||
r.DiskUsedGB = best.used
|
||||
r.DiskAvailGB = best.avail
|
||||
r.DiskUsePct = best.usePct
|
||||
}
|
||||
}
|
||||
|
||||
// 8. Inode usage: run `df -i /`, parse Use% from second line
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "df", "-i", "/"); err == nil {
|
||||
lines := strings.Split(out, "\n")
|
||||
if len(lines) >= 2 {
|
||||
fields := strings.Fields(lines[1])
|
||||
if len(fields) >= 5 {
|
||||
pctStr := strings.TrimSuffix(fields[4], "%")
|
||||
if n, err := strconv.Atoi(pctStr); err == nil {
|
||||
r.InodePct = n
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 9. OOM kills: run `dmesg 2>/dev/null | grep -ci 'out of memory'` via bash -c
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "bash", "-c", "dmesg 2>/dev/null | grep -ci 'out of memory'"); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.OOMKills = n
|
||||
}
|
||||
}
|
||||
// On error, OOMKills stays 0 (zero value)
|
||||
}
|
||||
|
||||
// 10. Kernel version: run `uname -r`
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "uname", "-r"); err == nil {
|
||||
r.KernelVersion = strings.TrimSpace(out)
|
||||
}
|
||||
}
|
||||
|
||||
// 11. Current unix timestamp
|
||||
r.TimeUnix = time.Now().Unix()
|
||||
|
||||
return r
|
||||
}
|
||||
275
pkg/cli/production/report/types.go
Normal file
275
pkg/cli/production/report/types.go
Normal file
@ -0,0 +1,275 @@
|
||||
package report
|
||||
|
||||
import "time"
|
||||
|
||||
// NodeReport is the top-level JSON output of `orama node report --json`.
|
||||
type NodeReport struct {
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Hostname string `json:"hostname"`
|
||||
PublicIP string `json:"public_ip,omitempty"`
|
||||
WGIP string `json:"wireguard_ip,omitempty"`
|
||||
Version string `json:"version"`
|
||||
CollectMS int64 `json:"collect_ms"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
|
||||
System *SystemReport `json:"system"`
|
||||
Services *ServicesReport `json:"services"`
|
||||
RQLite *RQLiteReport `json:"rqlite,omitempty"`
|
||||
Olric *OlricReport `json:"olric,omitempty"`
|
||||
IPFS *IPFSReport `json:"ipfs,omitempty"`
|
||||
Gateway *GatewayReport `json:"gateway,omitempty"`
|
||||
WireGuard *WireGuardReport `json:"wireguard,omitempty"`
|
||||
DNS *DNSReport `json:"dns,omitempty"`
|
||||
Anyone *AnyoneReport `json:"anyone,omitempty"`
|
||||
Network *NetworkReport `json:"network"`
|
||||
Processes *ProcessReport `json:"processes"`
|
||||
Namespaces []NamespaceReport `json:"namespaces,omitempty"`
|
||||
}
|
||||
|
||||
// --- System ---
|
||||
|
||||
type SystemReport struct {
|
||||
UptimeSeconds int64 `json:"uptime_seconds"`
|
||||
UptimeSince string `json:"uptime_since"`
|
||||
CPUCount int `json:"cpu_count"`
|
||||
LoadAvg1 float64 `json:"load_avg_1"`
|
||||
LoadAvg5 float64 `json:"load_avg_5"`
|
||||
LoadAvg15 float64 `json:"load_avg_15"`
|
||||
MemTotalMB int `json:"mem_total_mb"`
|
||||
MemUsedMB int `json:"mem_used_mb"`
|
||||
MemFreeMB int `json:"mem_free_mb"`
|
||||
MemAvailMB int `json:"mem_available_mb"`
|
||||
MemUsePct int `json:"mem_use_pct"`
|
||||
SwapTotalMB int `json:"swap_total_mb"`
|
||||
SwapUsedMB int `json:"swap_used_mb"`
|
||||
DiskTotalGB string `json:"disk_total_gb"`
|
||||
DiskUsedGB string `json:"disk_used_gb"`
|
||||
DiskAvailGB string `json:"disk_avail_gb"`
|
||||
DiskUsePct int `json:"disk_use_pct"`
|
||||
InodePct int `json:"inode_use_pct"`
|
||||
OOMKills int `json:"oom_kills"`
|
||||
KernelVersion string `json:"kernel_version"`
|
||||
TimeUnix int64 `json:"time_unix"`
|
||||
}
|
||||
|
||||
// --- Systemd Services ---
|
||||
|
||||
type ServicesReport struct {
|
||||
Services []ServiceInfo `json:"services"`
|
||||
FailedUnits []string `json:"failed_units,omitempty"`
|
||||
}
|
||||
|
||||
type ServiceInfo struct {
|
||||
Name string `json:"name"`
|
||||
ActiveState string `json:"active_state"`
|
||||
SubState string `json:"sub_state"`
|
||||
Enabled bool `json:"enabled"`
|
||||
NRestarts int `json:"n_restarts"`
|
||||
ActiveSinceSec int64 `json:"active_since_sec"`
|
||||
MemoryCurrentMB int `json:"memory_current_mb"`
|
||||
CPUUsageNSec int64 `json:"cpu_usage_nsec"`
|
||||
MainPID int `json:"main_pid"`
|
||||
RestartLoopRisk bool `json:"restart_loop_risk"`
|
||||
}
|
||||
|
||||
// --- RQLite ---
|
||||
|
||||
type RQLiteReport struct {
|
||||
Responsive bool `json:"responsive"`
|
||||
Ready bool `json:"ready"`
|
||||
StrongRead bool `json:"strong_read"`
|
||||
RaftState string `json:"raft_state,omitempty"`
|
||||
LeaderAddr string `json:"leader_addr,omitempty"`
|
||||
LeaderID string `json:"leader_id,omitempty"`
|
||||
NodeID string `json:"node_id,omitempty"`
|
||||
Term uint64 `json:"term,omitempty"`
|
||||
Applied uint64 `json:"applied_index,omitempty"`
|
||||
Commit uint64 `json:"commit_index,omitempty"`
|
||||
FsmPending uint64 `json:"fsm_pending,omitempty"`
|
||||
LastContact string `json:"last_contact,omitempty"`
|
||||
NumPeers int `json:"num_peers,omitempty"`
|
||||
Voter bool `json:"voter,omitempty"`
|
||||
DBSize string `json:"db_size,omitempty"`
|
||||
Uptime string `json:"uptime,omitempty"`
|
||||
Version string `json:"version,omitempty"`
|
||||
Goroutines int `json:"goroutines,omitempty"`
|
||||
HeapMB int `json:"heap_mb,omitempty"`
|
||||
Nodes map[string]RQLiteNodeInfo `json:"nodes,omitempty"`
|
||||
DebugVars *RQLiteDebugVarsReport `json:"debug_vars,omitempty"`
|
||||
}
|
||||
|
||||
type RQLiteNodeInfo struct {
|
||||
Reachable bool `json:"reachable"`
|
||||
Leader bool `json:"leader"`
|
||||
Voter bool `json:"voter"`
|
||||
TimeMS float64 `json:"time_ms"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type RQLiteDebugVarsReport struct {
|
||||
QueryErrors uint64 `json:"query_errors"`
|
||||
ExecuteErrors uint64 `json:"execute_errors"`
|
||||
RemoteExecErrors uint64 `json:"remote_exec_errors"`
|
||||
LeaderNotFound uint64 `json:"leader_not_found"`
|
||||
SnapshotErrors uint64 `json:"snapshot_errors"`
|
||||
ClientRetries uint64 `json:"client_retries"`
|
||||
ClientTimeouts uint64 `json:"client_timeouts"`
|
||||
}
|
||||
|
||||
// --- Olric ---
|
||||
|
||||
type OlricReport struct {
|
||||
ServiceActive bool `json:"service_active"`
|
||||
MemberlistUp bool `json:"memberlist_up"`
|
||||
MemberCount int `json:"member_count,omitempty"`
|
||||
Members []string `json:"members,omitempty"`
|
||||
Coordinator string `json:"coordinator,omitempty"`
|
||||
ProcessMemMB int `json:"process_mem_mb"`
|
||||
RestartCount int `json:"restart_count"`
|
||||
LogErrors int `json:"log_errors_1h"`
|
||||
LogSuspects int `json:"log_suspects_1h"`
|
||||
LogFlapping int `json:"log_flapping_1h"`
|
||||
}
|
||||
|
||||
// --- IPFS ---
|
||||
|
||||
type IPFSReport struct {
|
||||
DaemonActive bool `json:"daemon_active"`
|
||||
ClusterActive bool `json:"cluster_active"`
|
||||
SwarmPeerCount int `json:"swarm_peer_count"`
|
||||
ClusterPeerCount int `json:"cluster_peer_count"`
|
||||
ClusterErrors int `json:"cluster_errors"`
|
||||
RepoSizeBytes int64 `json:"repo_size_bytes"`
|
||||
RepoMaxBytes int64 `json:"repo_max_bytes"`
|
||||
RepoUsePct int `json:"repo_use_pct"`
|
||||
KuboVersion string `json:"kubo_version,omitempty"`
|
||||
ClusterVersion string `json:"cluster_version,omitempty"`
|
||||
HasSwarmKey bool `json:"has_swarm_key"`
|
||||
BootstrapEmpty bool `json:"bootstrap_empty"`
|
||||
}
|
||||
|
||||
// --- Gateway ---
|
||||
|
||||
type GatewayReport struct {
|
||||
Responsive bool `json:"responsive"`
|
||||
HTTPStatus int `json:"http_status,omitempty"`
|
||||
Version string `json:"version,omitempty"`
|
||||
Subsystems map[string]SubsystemHealth `json:"subsystems,omitempty"`
|
||||
}
|
||||
|
||||
type SubsystemHealth struct {
|
||||
Status string `json:"status"`
|
||||
Latency string `json:"latency,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// --- WireGuard ---
|
||||
|
||||
type WireGuardReport struct {
|
||||
InterfaceUp bool `json:"interface_up"`
|
||||
ServiceActive bool `json:"service_active"`
|
||||
WgIP string `json:"wg_ip,omitempty"`
|
||||
ListenPort int `json:"listen_port,omitempty"`
|
||||
PeerCount int `json:"peer_count"`
|
||||
MTU int `json:"mtu,omitempty"`
|
||||
ConfigExists bool `json:"config_exists"`
|
||||
ConfigPerms string `json:"config_perms,omitempty"`
|
||||
Peers []WGPeerInfo `json:"peers,omitempty"`
|
||||
}
|
||||
|
||||
type WGPeerInfo struct {
|
||||
PublicKey string `json:"public_key"`
|
||||
Endpoint string `json:"endpoint,omitempty"`
|
||||
AllowedIPs string `json:"allowed_ips"`
|
||||
LatestHandshake int64 `json:"latest_handshake"`
|
||||
HandshakeAgeSec int64 `json:"handshake_age_sec"`
|
||||
TransferRx int64 `json:"transfer_rx_bytes"`
|
||||
TransferTx int64 `json:"transfer_tx_bytes"`
|
||||
Keepalive int `json:"keepalive,omitempty"`
|
||||
}
|
||||
|
||||
// --- DNS ---
|
||||
|
||||
type DNSReport struct {
|
||||
CoreDNSActive bool `json:"coredns_active"`
|
||||
CaddyActive bool `json:"caddy_active"`
|
||||
Port53Bound bool `json:"port_53_bound"`
|
||||
Port80Bound bool `json:"port_80_bound"`
|
||||
Port443Bound bool `json:"port_443_bound"`
|
||||
CoreDNSMemMB int `json:"coredns_mem_mb"`
|
||||
CoreDNSRestarts int `json:"coredns_restarts"`
|
||||
LogErrors int `json:"log_errors_5m"`
|
||||
CorefileExists bool `json:"corefile_exists"`
|
||||
SOAResolves bool `json:"soa_resolves"`
|
||||
NSResolves bool `json:"ns_resolves"`
|
||||
NSRecordCount int `json:"ns_record_count"`
|
||||
WildcardResolves bool `json:"wildcard_resolves"`
|
||||
BaseAResolves bool `json:"base_a_resolves"`
|
||||
BaseTLSDaysLeft int `json:"base_tls_days_left"`
|
||||
WildTLSDaysLeft int `json:"wild_tls_days_left"`
|
||||
}
|
||||
|
||||
// --- Anyone ---
|
||||
|
||||
type AnyoneReport struct {
|
||||
RelayActive bool `json:"relay_active"`
|
||||
ClientActive bool `json:"client_active"`
|
||||
Mode string `json:"mode,omitempty"`
|
||||
ORPortListening bool `json:"orport_listening"`
|
||||
SocksListening bool `json:"socks_listening"`
|
||||
ControlListening bool `json:"control_listening"`
|
||||
Bootstrapped bool `json:"bootstrapped"`
|
||||
BootstrapPct int `json:"bootstrap_pct"`
|
||||
Fingerprint string `json:"fingerprint,omitempty"`
|
||||
Nickname string `json:"nickname,omitempty"`
|
||||
}
|
||||
|
||||
// --- Network ---
|
||||
|
||||
type NetworkReport struct {
|
||||
InternetReachable bool `json:"internet_reachable"`
|
||||
DefaultRoute bool `json:"default_route"`
|
||||
WGRouteExists bool `json:"wg_route_exists"`
|
||||
TCPEstablished int `json:"tcp_established"`
|
||||
TCPTimeWait int `json:"tcp_time_wait"`
|
||||
TCPRetransRate float64 `json:"tcp_retrans_pct"`
|
||||
ListeningPorts []PortInfo `json:"listening_ports"`
|
||||
UFWActive bool `json:"ufw_active"`
|
||||
UFWRules []string `json:"ufw_rules,omitempty"`
|
||||
}
|
||||
|
||||
type PortInfo struct {
|
||||
Port int `json:"port"`
|
||||
Proto string `json:"proto"`
|
||||
Process string `json:"process,omitempty"`
|
||||
}
|
||||
|
||||
// --- Processes ---
|
||||
|
||||
type ProcessReport struct {
|
||||
ZombieCount int `json:"zombie_count"`
|
||||
Zombies []ProcessInfo `json:"zombies,omitempty"`
|
||||
OrphanCount int `json:"orphan_count"`
|
||||
Orphans []ProcessInfo `json:"orphans,omitempty"`
|
||||
PanicCount int `json:"panic_count_1h"`
|
||||
}
|
||||
|
||||
type ProcessInfo struct {
|
||||
PID int `json:"pid"`
|
||||
PPID int `json:"ppid"`
|
||||
State string `json:"state"`
|
||||
Command string `json:"command"`
|
||||
}
|
||||
|
||||
// --- Namespaces ---
|
||||
|
||||
type NamespaceReport struct {
|
||||
Name string `json:"name"`
|
||||
PortBase int `json:"port_base"`
|
||||
RQLiteUp bool `json:"rqlite_up"`
|
||||
RQLiteState string `json:"rqlite_state,omitempty"`
|
||||
RQLiteReady bool `json:"rqlite_ready"`
|
||||
OlricUp bool `json:"olric_up"`
|
||||
GatewayUp bool `json:"gateway_up"`
|
||||
GatewayStatus int `json:"gateway_status,omitempty"`
|
||||
}
|
||||
163
pkg/cli/production/report/wireguard.go
Normal file
163
pkg/cli/production/report/wireguard.go
Normal file
@ -0,0 +1,163 @@
|
||||
package report
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// collectWireGuard gathers WireGuard interface status, peer information,
|
||||
// and configuration details using local commands and sysfs.
|
||||
func collectWireGuard() *WireGuardReport {
|
||||
r := &WireGuardReport{}
|
||||
|
||||
// 1. ServiceActive: check if wg-quick@wg0 systemd service is active
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "systemctl", "is-active", "wg-quick@wg0"); err == nil {
|
||||
r.ServiceActive = strings.TrimSpace(out) == "active"
|
||||
}
|
||||
}
|
||||
|
||||
// 2. InterfaceUp: check if /sys/class/net/wg0 exists
|
||||
if _, err := os.Stat("/sys/class/net/wg0"); err == nil {
|
||||
r.InterfaceUp = true
|
||||
}
|
||||
|
||||
// If interface is not up, return partial data early.
|
||||
if !r.InterfaceUp {
|
||||
// Still check config existence even if interface is down.
|
||||
if _, err := os.Stat("/etc/wireguard/wg0.conf"); err == nil {
|
||||
r.ConfigExists = true
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "stat", "-c", "%a", "/etc/wireguard/wg0.conf"); err == nil {
|
||||
r.ConfigPerms = strings.TrimSpace(out)
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// 3. WgIP: extract IP from `ip -4 addr show wg0`
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "ip", "-4", "addr", "show", "wg0"); err == nil {
|
||||
for _, line := range strings.Split(out, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "inet ") {
|
||||
// Line format: "inet X.X.X.X/Y scope ..."
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) >= 2 {
|
||||
// Extract just the IP, strip the /prefix
|
||||
ip := fields[1]
|
||||
if idx := strings.Index(ip, "/"); idx != -1 {
|
||||
ip = ip[:idx]
|
||||
}
|
||||
r.WgIP = ip
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. MTU: read /sys/class/net/wg0/mtu
|
||||
if data, err := os.ReadFile("/sys/class/net/wg0/mtu"); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(string(data))); err == nil {
|
||||
r.MTU = n
|
||||
}
|
||||
}
|
||||
|
||||
// 5. ListenPort: parse from `wg show wg0 listen-port`
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "wg", "show", "wg0", "listen-port"); err == nil {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(out)); err == nil {
|
||||
r.ListenPort = n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. ConfigExists: check if /etc/wireguard/wg0.conf exists
|
||||
if _, err := os.Stat("/etc/wireguard/wg0.conf"); err == nil {
|
||||
r.ConfigExists = true
|
||||
}
|
||||
|
||||
// 7. ConfigPerms: run `stat -c '%a' /etc/wireguard/wg0.conf`
|
||||
if r.ConfigExists {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "stat", "-c", "%a", "/etc/wireguard/wg0.conf"); err == nil {
|
||||
r.ConfigPerms = strings.TrimSpace(out)
|
||||
}
|
||||
}
|
||||
|
||||
// 8. Peers: run `wg show wg0 dump` and parse peer lines
|
||||
// Line 1: interface (private_key, public_key, listen_port, fwmark)
|
||||
// Line 2+: peers (public_key, preshared_key, endpoint, allowed_ips,
|
||||
// latest_handshake, transfer_rx, transfer_tx, persistent_keepalive)
|
||||
{
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Second)
|
||||
defer cancel()
|
||||
if out, err := runCmd(ctx, "wg", "show", "wg0", "dump"); err == nil {
|
||||
lines := strings.Split(out, "\n")
|
||||
now := time.Now().Unix()
|
||||
for i, line := range lines {
|
||||
if i == 0 {
|
||||
// Skip interface line
|
||||
continue
|
||||
}
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Split(line, "\t")
|
||||
if len(fields) < 8 {
|
||||
continue
|
||||
}
|
||||
|
||||
peer := WGPeerInfo{
|
||||
PublicKey: fields[0],
|
||||
Endpoint: fields[2],
|
||||
AllowedIPs: fields[3],
|
||||
}
|
||||
|
||||
// LatestHandshake: unix timestamp (0 = never)
|
||||
if ts, err := strconv.ParseInt(fields[4], 10, 64); err == nil {
|
||||
peer.LatestHandshake = ts
|
||||
if ts > 0 {
|
||||
peer.HandshakeAgeSec = now - ts
|
||||
}
|
||||
}
|
||||
|
||||
// TransferRx
|
||||
if n, err := strconv.ParseInt(fields[5], 10, 64); err == nil {
|
||||
peer.TransferRx = n
|
||||
}
|
||||
|
||||
// TransferTx
|
||||
if n, err := strconv.ParseInt(fields[6], 10, 64); err == nil {
|
||||
peer.TransferTx = n
|
||||
}
|
||||
|
||||
// PersistentKeepalive
|
||||
if fields[7] != "off" {
|
||||
if n, err := strconv.Atoi(fields[7]); err == nil {
|
||||
peer.Keepalive = n
|
||||
}
|
||||
}
|
||||
|
||||
r.Peers = append(r.Peers, peer)
|
||||
}
|
||||
r.PeerCount = len(r.Peers)
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user