Improved how we present results on inspector and added anyone tests

This commit is contained in:
anonpenguin23 2026-02-11 11:07:56 +02:00
parent ebdd08f71c
commit 4356f5544a
9 changed files with 1105 additions and 6 deletions

2
.gitignore vendored
View File

@ -106,3 +106,5 @@ terms-agreement
cli cli
./inspector ./inspector
results/

BIN
inspector

Binary file not shown.

View File

@ -51,10 +51,12 @@ func HandleInspectCommand(args []string) {
configPath := fs.String("config", "scripts/remote-nodes.conf", "Path to remote-nodes.conf") configPath := fs.String("config", "scripts/remote-nodes.conf", "Path to remote-nodes.conf")
env := fs.String("env", "", "Environment to inspect (devnet, testnet)") env := fs.String("env", "", "Environment to inspect (devnet, testnet)")
subsystem := fs.String("subsystem", "all", "Subsystem to inspect (rqlite,olric,ipfs,dns,wg,system,network,all)") subsystem := fs.String("subsystem", "all", "Subsystem to inspect (rqlite,olric,ipfs,dns,wg,system,network,anyone,all)")
format := fs.String("format", "table", "Output format (table, json)") format := fs.String("format", "table", "Output format (table, json)")
timeout := fs.Duration("timeout", 30*time.Second, "SSH command timeout") timeout := fs.Duration("timeout", 30*time.Second, "SSH command timeout")
verbose := fs.Bool("verbose", false, "Verbose output") verbose := fs.Bool("verbose", false, "Verbose output")
// Output flags
outputDir := fs.String("output", "", "Save results to directory as markdown (e.g., ./results)")
// AI flags // AI flags
aiEnabled := fs.Bool("ai", false, "Enable AI analysis of failures") aiEnabled := fs.Bool("ai", false, "Enable AI analysis of failures")
aiModel := fs.String("model", "moonshotai/kimi-k2.5", "OpenRouter model for AI analysis") aiModel := fs.String("model", "moonshotai/kimi-k2.5", "OpenRouter model for AI analysis")
@ -70,6 +72,7 @@ func HandleInspectCommand(args []string) {
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --subsystem rqlite\n") fmt.Fprintf(os.Stderr, " orama inspect --env devnet --subsystem rqlite\n")
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai\n") fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai\n")
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --model openai/gpt-4o\n") fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --model openai/gpt-4o\n")
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --output ./results\n")
} }
if err := fs.Parse(args); err != nil { if err := fs.Parse(args); err != nil {
@ -136,18 +139,31 @@ func HandleInspectCommand(args []string) {
} }
// Phase 4: AI Analysis (if enabled and there are failures or warnings) // Phase 4: AI Analysis (if enabled and there are failures or warnings)
var analysis *inspector.AnalysisResult
if *aiEnabled { if *aiEnabled {
issues := results.FailuresAndWarnings() issues := results.FailuresAndWarnings()
if len(issues) == 0 { if len(issues) == 0 {
fmt.Printf("\nAll checks passed — no AI analysis needed.\n") fmt.Printf("\nAll checks passed — no AI analysis needed.\n")
} else if *outputDir != "" {
// Per-group AI analysis for file output
groups := inspector.GroupFailures(results)
fmt.Printf("\nAnalyzing %d unique issues with %s...\n", len(groups), *aiModel)
var err error
analysis, err = inspector.AnalyzeGroups(groups, results, data, *aiModel, *aiAPIKey)
if err != nil {
fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err)
} else { } else {
// Count affected subsystems inspector.PrintAnalysis(analysis, os.Stdout)
}
} else {
// Per-subsystem AI analysis for terminal output
subs := map[string]bool{} subs := map[string]bool{}
for _, c := range issues { for _, c := range issues {
subs[c.Subsystem] = true subs[c.Subsystem] = true
} }
fmt.Printf("\nAnalyzing %d issues across %d subsystems with %s...\n", len(issues), len(subs), *aiModel) fmt.Printf("\nAnalyzing %d issues across %d subsystems with %s...\n", len(issues), len(subs), *aiModel)
analysis, err := inspector.Analyze(results, data, *aiModel, *aiAPIKey) var err error
analysis, err = inspector.Analyze(results, data, *aiModel, *aiAPIKey)
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err) fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err)
} else { } else {
@ -156,6 +172,16 @@ func HandleInspectCommand(args []string) {
} }
} }
// Phase 5: Write results to disk (if --output is set)
if *outputDir != "" {
outPath, err := inspector.WriteResults(*outputDir, *env, results, data, analysis)
if err != nil {
fmt.Fprintf(os.Stderr, "\nError writing results: %v\n", err)
} else {
fmt.Printf("\nResults saved to %s\n", outPath)
}
}
// Exit with non-zero if any failures // Exit with non-zero if any failures
if failures := results.Failures(); len(failures) > 0 { if failures := results.Failures(); len(failures) > 0 {
os.Exit(1) os.Exit(1)

View File

@ -52,9 +52,10 @@ Step-by-step commands to resolve. Include actual node IPs/names from the data wh
### Prevention ### Prevention
What could prevent this in the future? (omit if not applicable)` What could prevent this in the future? (omit if not applicable)`
// SubsystemAnalysis holds the AI analysis for a single subsystem. // SubsystemAnalysis holds the AI analysis for a single subsystem or failure group.
type SubsystemAnalysis struct { type SubsystemAnalysis struct {
Subsystem string Subsystem string
GroupID string // e.g. "anyone.bootstrapped" — empty when analyzing whole subsystem
Analysis string Analysis string
Duration time.Duration Duration time.Duration
Error error Error error
@ -149,6 +150,125 @@ func Analyze(results *Results, data *ClusterData, model, apiKey string) (*Analys
}, nil }, nil
} }
// AnalyzeGroups sends each failure group to OpenRouter for focused AI analysis.
// Unlike Analyze which sends one call per subsystem, this sends one call per unique
// failure pattern, producing more focused and actionable results.
func AnalyzeGroups(groups []FailureGroup, results *Results, data *ClusterData, model, apiKey string) (*AnalysisResult, error) {
if apiKey == "" {
apiKey = os.Getenv("OPENROUTER_API_KEY")
}
if apiKey == "" {
return nil, fmt.Errorf("no API key: set --api-key or OPENROUTER_API_KEY env")
}
if len(groups) == 0 {
return &AnalysisResult{Model: model}, nil
}
// Build shared context
issuesBySubsystem := map[string][]CheckResult{}
for _, c := range results.FailuresAndWarnings() {
issuesBySubsystem[c.Subsystem] = append(issuesBySubsystem[c.Subsystem], c)
}
healthySummary := buildHealthySummary(results, issuesBySubsystem)
collectionErrors := buildCollectionErrors(data)
start := time.Now()
var mu sync.Mutex
var wg sync.WaitGroup
var analyses []SubsystemAnalysis
for _, g := range groups {
wg.Add(1)
go func(group FailureGroup) {
defer wg.Done()
prompt := buildGroupPrompt(group, data, healthySummary, collectionErrors)
subStart := time.Now()
response, err := callOpenRouter(model, apiKey, prompt)
sa := SubsystemAnalysis{
Subsystem: group.Subsystem,
GroupID: group.ID,
Duration: time.Since(subStart),
}
if err != nil {
sa.Error = err
} else {
sa.Analysis = response
}
mu.Lock()
analyses = append(analyses, sa)
mu.Unlock()
}(g)
}
wg.Wait()
// Sort by subsystem then group ID for consistent output
sort.Slice(analyses, func(i, j int) bool {
if analyses[i].Subsystem != analyses[j].Subsystem {
return analyses[i].Subsystem < analyses[j].Subsystem
}
return analyses[i].GroupID < analyses[j].GroupID
})
return &AnalysisResult{
Model: model,
Analyses: analyses,
Duration: time.Since(start),
}, nil
}
func buildGroupPrompt(group FailureGroup, data *ClusterData, healthySummary, collectionErrors string) string {
var b strings.Builder
icon := "FAILURE"
if group.Status == StatusWarn {
icon = "WARNING"
}
b.WriteString(fmt.Sprintf("## %s: %s\n\n", icon, group.Name))
b.WriteString(fmt.Sprintf("**Check ID:** %s \n", group.ID))
b.WriteString(fmt.Sprintf("**Severity:** %s \n", group.Severity))
b.WriteString(fmt.Sprintf("**Nodes affected:** %d \n\n", len(group.Nodes)))
b.WriteString("**Affected nodes:**\n")
for _, n := range group.Nodes {
b.WriteString(fmt.Sprintf("- %s\n", n))
}
b.WriteString("\n")
b.WriteString("**Error messages:**\n")
for _, m := range group.Messages {
b.WriteString(fmt.Sprintf("- %s\n", m))
}
b.WriteString("\n")
// Subsystem raw data
contextData := buildSubsystemContext(group.Subsystem, data)
if contextData != "" {
b.WriteString(fmt.Sprintf("## %s Raw Data (all nodes)\n", strings.ToUpper(group.Subsystem)))
b.WriteString(contextData)
b.WriteString("\n")
}
if healthySummary != "" {
b.WriteString("## Healthy Subsystems\n")
b.WriteString(healthySummary)
b.WriteString("\n")
}
if collectionErrors != "" {
b.WriteString("## Collection Errors\n")
b.WriteString(collectionErrors)
b.WriteString("\n")
}
b.WriteString(fmt.Sprintf("\nAnalyze this specific %s issue. Be concise — focus on this one problem.\n", group.Subsystem))
return b.String()
}
func buildClusterOverview(data *ClusterData, results *Results) string { func buildClusterOverview(data *ClusterData, results *Results) string {
var b strings.Builder var b strings.Builder
b.WriteString(fmt.Sprintf("Nodes: %d\n", len(data.Nodes))) b.WriteString(fmt.Sprintf("Nodes: %d\n", len(data.Nodes)))
@ -286,6 +406,8 @@ func buildSubsystemContext(subsystem string, data *ClusterData) string {
return buildNetworkContext(data) return buildNetworkContext(data)
case "namespace": case "namespace":
return buildNamespaceContext(data) return buildNamespaceContext(data)
case "anyone":
return buildAnyoneContext(data)
default: default:
return "" return ""
} }
@ -486,6 +608,40 @@ func buildNamespaceContext(data *ClusterData) string {
return b.String() return b.String()
} }
func buildAnyoneContext(data *ClusterData) string {
var b strings.Builder
for host, nd := range data.Nodes {
if nd.Anyone == nil {
continue
}
a := nd.Anyone
if !a.RelayActive && !a.ClientActive {
continue
}
b.WriteString(fmt.Sprintf("### %s\n", host))
b.WriteString(fmt.Sprintf(" relay=%v client=%v orport=%v socks=%v control=%v\n",
a.RelayActive, a.ClientActive, a.ORPortListening, a.SocksListening, a.ControlListening))
if a.RelayActive {
b.WriteString(fmt.Sprintf(" bootstrap=%d%% fingerprint=%s nickname=%s\n",
a.BootstrapPct, a.Fingerprint, a.Nickname))
}
if len(a.ORPortReachable) > 0 {
var unreachable []string
for h, ok := range a.ORPortReachable {
if !ok {
unreachable = append(unreachable, h)
}
}
if len(unreachable) > 0 {
b.WriteString(fmt.Sprintf(" orport_unreachable: %s\n", strings.Join(unreachable, ", ")))
} else {
b.WriteString(fmt.Sprintf(" orport: all %d peers reachable\n", len(a.ORPortReachable)))
}
}
}
return b.String()
}
// OpenRouter API types (OpenAI-compatible) // OpenRouter API types (OpenAI-compatible)
type openRouterRequest struct { type openRouterRequest struct {
@ -531,7 +687,7 @@ func callOpenRouter(model, apiKey, prompt string) (string, error) {
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+apiKey) req.Header.Set("Authorization", "Bearer "+apiKey)
client := &http.Client{Timeout: 120 * time.Second} client := &http.Client{Timeout: 180 * time.Second}
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
return "", fmt.Errorf("HTTP request: %w", err) return "", fmt.Errorf("HTTP request: %w", err)

View File

@ -0,0 +1,170 @@
package checks
import (
"fmt"
"github.com/DeBrosOfficial/network/pkg/inspector"
)
func init() {
inspector.RegisterChecker("anyone", CheckAnyone)
}
const anyoneSub = "anyone"
// CheckAnyone runs all Anyone relay/client health checks.
func CheckAnyone(data *inspector.ClusterData) []inspector.CheckResult {
var results []inspector.CheckResult
for _, nd := range data.Nodes {
if nd.Anyone == nil {
continue
}
results = append(results, checkAnyonePerNode(nd)...)
}
results = append(results, checkAnyoneCrossNode(data)...)
return results
}
func checkAnyonePerNode(nd *inspector.NodeData) []inspector.CheckResult {
var r []inspector.CheckResult
a := nd.Anyone
node := nd.Node.Name()
// If neither service is active, skip all checks for this node
if !a.RelayActive && !a.ClientActive {
return r
}
// --- Relay checks ---
if a.RelayActive {
r = append(r, inspector.Pass("anyone.relay_active", "Anyone relay service active", anyoneSub, node,
"debros-anyone-relay is active", inspector.High))
// ORPort listening
if a.ORPortListening {
r = append(r, inspector.Pass("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node,
"port 9001 bound", inspector.High))
} else {
r = append(r, inspector.Fail("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node,
"port 9001 NOT bound", inspector.High))
}
// Control port
if a.ControlListening {
r = append(r, inspector.Pass("anyone.control_listening", "Control port 9051 listening", anyoneSub, node,
"port 9051 bound", inspector.Low))
} else {
r = append(r, inspector.Warn("anyone.control_listening", "Control port 9051 listening", anyoneSub, node,
"port 9051 NOT bound (monitoring unavailable)", inspector.Low))
}
// Bootstrap status
if a.Bootstrapped {
r = append(r, inspector.Pass("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
fmt.Sprintf("bootstrap=%d%%", a.BootstrapPct), inspector.High))
} else if a.BootstrapPct > 0 {
r = append(r, inspector.Warn("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
fmt.Sprintf("bootstrap=%d%% (still connecting)", a.BootstrapPct), inspector.High))
} else {
r = append(r, inspector.Fail("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
"bootstrap=0% (not started or log missing)", inspector.High))
}
// Fingerprint present
if a.Fingerprint != "" {
r = append(r, inspector.Pass("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node,
fmt.Sprintf("fingerprint=%s", a.Fingerprint), inspector.Medium))
} else {
r = append(r, inspector.Warn("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node,
"no fingerprint found (relay may not have generated keys yet)", inspector.Medium))
}
// Nickname configured
if a.Nickname != "" {
r = append(r, inspector.Pass("anyone.nickname", "Relay nickname configured", anyoneSub, node,
fmt.Sprintf("nickname=%s", a.Nickname), inspector.Low))
} else {
r = append(r, inspector.Warn("anyone.nickname", "Relay nickname configured", anyoneSub, node,
"no nickname in /etc/anon/anonrc", inspector.Low))
}
}
// --- Client checks ---
if a.ClientActive {
r = append(r, inspector.Pass("anyone.client_active", "Anyone client service active", anyoneSub, node,
"debros-anyone-client is active", inspector.High))
// SOCKS5 port listening
if a.SocksListening {
r = append(r, inspector.Pass("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node,
"port 9050 bound", inspector.High))
} else {
r = append(r, inspector.Fail("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node,
"port 9050 NOT bound (IPFS traffic cannot route through anonymity network)", inspector.High))
}
}
return r
}
func checkAnyoneCrossNode(data *inspector.ClusterData) []inspector.CheckResult {
var r []inspector.CheckResult
// Count relay and client nodes
relayActive := 0
relayTotal := 0
clientActive := 0
clientTotal := 0
for _, nd := range data.Nodes {
if nd.Anyone == nil {
continue
}
if nd.Anyone.RelayActive {
relayActive++
relayTotal++
}
if nd.Anyone.ClientActive {
clientActive++
clientTotal++
}
}
// Skip cross-node checks if no Anyone services at all
if relayTotal == 0 && clientTotal == 0 {
return r
}
// ORPort reachability: check if relays are publicly accessible from other nodes
orportChecked := 0
orportReachable := 0
orportFailed := 0
for _, nd := range data.Nodes {
if nd.Anyone == nil {
continue
}
for host, ok := range nd.Anyone.ORPortReachable {
orportChecked++
if ok {
orportReachable++
} else {
orportFailed++
r = append(r, inspector.Fail("anyone.orport_reachable",
fmt.Sprintf("ORPort 9001 reachable on %s", host),
anyoneSub, nd.Node.Name(),
fmt.Sprintf("cannot TCP connect to %s:9001 from %s", host, nd.Node.Name()), inspector.High))
}
}
}
if orportChecked > 0 && orportFailed == 0 {
r = append(r, inspector.Pass("anyone.orport_reachable", "ORPort 9001 reachable across nodes", anyoneSub, "",
fmt.Sprintf("all %d cross-node connections OK", orportReachable), inspector.High))
}
return r
}

View File

@ -0,0 +1,219 @@
package checks
import (
"testing"
"github.com/DeBrosOfficial/network/pkg/inspector"
)
func TestCheckAnyone_NilData(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
if len(results) != 0 {
t.Errorf("expected 0 results for nil Anyone data, got %d", len(results))
}
}
func TestCheckAnyone_BothInactive(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
if len(results) != 0 {
t.Errorf("expected 0 results when both services inactive, got %d", len(results))
}
}
func TestCheckAnyone_HealthyRelay(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
ControlListening: true,
Bootstrapped: true,
BootstrapPct: 100,
Fingerprint: "ABCDEF1234567890",
Nickname: "OramaRelay1",
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.relay_active", inspector.StatusPass)
expectStatus(t, results, "anyone.orport_listening", inspector.StatusPass)
expectStatus(t, results, "anyone.control_listening", inspector.StatusPass)
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusPass)
expectStatus(t, results, "anyone.fingerprint", inspector.StatusPass)
expectStatus(t, results, "anyone.nickname", inspector.StatusPass)
}
func TestCheckAnyone_HealthyClient(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
ClientActive: true,
SocksListening: true,
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.client_active", inspector.StatusPass)
expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass)
}
func TestCheckAnyone_RelayORPortDown(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: false,
ControlListening: true,
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.orport_listening", inspector.StatusFail)
}
func TestCheckAnyone_RelayNotBootstrapped(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
BootstrapPct: 0,
Bootstrapped: false,
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusFail)
}
func TestCheckAnyone_RelayPartialBootstrap(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
BootstrapPct: 75,
Bootstrapped: false,
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusWarn)
}
func TestCheckAnyone_ClientSocksDown(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
ClientActive: true,
SocksListening: false,
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.socks_listening", inspector.StatusFail)
}
func TestCheckAnyone_NoFingerprint(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
Fingerprint: "",
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.fingerprint", inspector.StatusWarn)
}
func TestCheckAnyone_CrossNode_ORPortReachable(t *testing.T) {
nd1 := makeNodeData("1.1.1.1", "node")
nd1.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
ORPortReachable: map[string]bool{"2.2.2.2": true},
}
nd2 := makeNodeData("2.2.2.2", "node")
nd2.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
ORPortReachable: map[string]bool{"1.1.1.1": true},
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd1, "2.2.2.2": nd2})
results := CheckAnyone(data)
expectStatus(t, results, "anyone.orport_reachable", inspector.StatusPass)
}
func TestCheckAnyone_CrossNode_ORPortUnreachable(t *testing.T) {
nd1 := makeNodeData("1.1.1.1", "node")
nd1.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
ORPortReachable: map[string]bool{"2.2.2.2": false},
}
nd2 := makeNodeData("2.2.2.2", "node")
nd2.Anyone = &inspector.AnyoneData{
RelayActive: true,
ORPortListening: true,
ORPortReachable: map[string]bool{"1.1.1.1": true},
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd1, "2.2.2.2": nd2})
results := CheckAnyone(data)
// Should have at least one fail for the unreachable connection
hasFail := false
for _, r := range results {
if r.ID == "anyone.orport_reachable" && r.Status == inspector.StatusFail {
hasFail = true
}
}
if !hasFail {
t.Error("expected at least one anyone.orport_reachable fail")
}
}
func TestCheckAnyone_BothRelayAndClient(t *testing.T) {
nd := makeNodeData("1.1.1.1", "node")
nd.Anyone = &inspector.AnyoneData{
RelayActive: true,
ClientActive: true,
ORPortListening: true,
SocksListening: true,
ControlListening: true,
Bootstrapped: true,
BootstrapPct: 100,
Fingerprint: "ABCDEF",
Nickname: "test",
ORPortReachable: make(map[string]bool),
}
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
results := CheckAnyone(data)
// Should have both relay and client checks
expectStatus(t, results, "anyone.relay_active", inspector.StatusPass)
expectStatus(t, results, "anyone.client_active", inspector.StatusPass)
expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass)
expectStatus(t, results, "anyone.orport_listening", inspector.StatusPass)
}

View File

@ -50,6 +50,22 @@ func checkSystemPerNode(nd *inspector.NodeData) []inspector.CheckResult {
} }
} }
// 6.2 Anyone relay/client services (only check if installed, don't fail if absent)
for _, svc := range []string{"debros-anyone-relay", "debros-anyone-client"} {
status, ok := sys.Services[svc]
if !ok || status == "inactive" {
continue // not installed or intentionally stopped
}
id := fmt.Sprintf("system.svc_%s", strings.ReplaceAll(svc, "-", "_"))
name := fmt.Sprintf("%s service active", svc)
if status == "active" {
r = append(r, inspector.Pass(id, name, systemSub, node, "active", inspector.High))
} else {
r = append(r, inspector.Fail(id, name, systemSub, node,
fmt.Sprintf("status=%s (should be active or uninstalled)", status), inspector.High))
}
}
// 6.5 WireGuard service // 6.5 WireGuard service
if status, ok := sys.Services["wg-quick@wg0"]; ok { if status, ok := sys.Services["wg-quick@wg0"]; ok {
if status == "active" { if status == "active" {

View File

@ -26,6 +26,7 @@ type NodeData struct {
WireGuard *WireGuardData WireGuard *WireGuardData
System *SystemData System *SystemData
Network *NetworkData Network *NetworkData
Anyone *AnyoneData
Namespaces []NamespaceData // namespace instances on this node Namespaces []NamespaceData // namespace instances on this node
Errors []string // collection errors for this node Errors []string // collection errors for this node
} }
@ -224,6 +225,21 @@ type NetworkData struct {
PingResults map[string]bool // WG peer IP → ping success PingResults map[string]bool // WG peer IP → ping success
} }
// AnyoneData holds parsed Anyone relay/client status from a node.
type AnyoneData struct {
RelayActive bool // debros-anyone-relay systemd service active
ClientActive bool // debros-anyone-client systemd service active
ORPortListening bool // port 9001 bound locally
SocksListening bool // port 9050 bound locally (client SOCKS5)
ControlListening bool // port 9051 bound locally (control port)
Bootstrapped bool // relay has bootstrapped to 100%
BootstrapPct int // bootstrap percentage (0-100)
Fingerprint string // relay fingerprint
Nickname string // relay nickname
UptimeStr string // uptime from control port
ORPortReachable map[string]bool // host IP → whether we can TCP connect to their 9001 from this node
}
// Collect gathers data from all nodes in parallel. // Collect gathers data from all nodes in parallel.
func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose bool) *ClusterData { func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose bool) *ClusterData {
start := time.Now() start := time.Now()
@ -246,6 +262,10 @@ func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose boo
} }
wg.Wait() wg.Wait()
// Second pass: cross-node ORPort reachability (needs all nodes collected first)
collectAnyoneReachability(ctx, data)
data.Duration = time.Since(start) data.Duration = time.Since(start)
return data return data
} }
@ -286,6 +306,9 @@ func collectNode(ctx context.Context, node Node, subsystems []string, verbose bo
if shouldCollect("network") { if shouldCollect("network") {
nd.Network = collectNetwork(ctx, node, nd.WireGuard) nd.Network = collectNetwork(ctx, node, nd.WireGuard)
} }
if shouldCollect("anyone") {
nd.Anyone = collectAnyone(ctx, node)
}
// Namespace collection — always collect if any subsystem is collected // Namespace collection — always collect if any subsystem is collected
nd.Namespaces = collectNamespaces(ctx, node) nd.Namespaces = collectNamespaces(ctx, node)
@ -1113,6 +1136,139 @@ echo "$SEP"
return data return data
} }
func collectAnyone(ctx context.Context, node Node) *AnyoneData {
data := &AnyoneData{
ORPortReachable: make(map[string]bool),
}
cmd := `
SEP="===INSPECTOR_SEP==="
echo "$SEP"
systemctl is-active debros-anyone-relay 2>/dev/null || echo inactive
echo "$SEP"
systemctl is-active debros-anyone-client 2>/dev/null || echo inactive
echo "$SEP"
ss -tlnp 2>/dev/null | grep -q ':9001 ' && echo yes || echo no
echo "$SEP"
ss -tlnp 2>/dev/null | grep -q ':9050 ' && echo yes || echo no
echo "$SEP"
ss -tlnp 2>/dev/null | grep -q ':9051 ' && echo yes || echo no
echo "$SEP"
# Check bootstrap status from log (last 50 lines)
grep -oP 'Bootstrapped \K[0-9]+' /var/log/anon/notices.log 2>/dev/null | tail -1 || echo 0
echo "$SEP"
# Read fingerprint
cat /var/lib/anon/fingerprint 2>/dev/null || echo ""
echo "$SEP"
# Read nickname from config
grep -oP '^Nickname \K\S+' /etc/anon/anonrc 2>/dev/null || echo ""
`
res := RunSSH(ctx, node, cmd)
if !res.OK() && res.Stdout == "" {
return data
}
parts := strings.Split(res.Stdout, "===INSPECTOR_SEP===")
if len(parts) > 1 {
data.RelayActive = strings.TrimSpace(parts[1]) == "active"
}
if len(parts) > 2 {
data.ClientActive = strings.TrimSpace(parts[2]) == "active"
}
if len(parts) > 3 {
data.ORPortListening = strings.TrimSpace(parts[3]) == "yes"
}
if len(parts) > 4 {
data.SocksListening = strings.TrimSpace(parts[4]) == "yes"
}
if len(parts) > 5 {
data.ControlListening = strings.TrimSpace(parts[5]) == "yes"
}
if len(parts) > 6 {
pct := parseIntDefault(strings.TrimSpace(parts[6]), 0)
data.BootstrapPct = pct
data.Bootstrapped = pct >= 100
}
if len(parts) > 7 {
data.Fingerprint = strings.TrimSpace(parts[7])
}
if len(parts) > 8 {
data.Nickname = strings.TrimSpace(parts[8])
}
// If neither relay nor client is active, skip further checks
if !data.RelayActive && !data.ClientActive {
return data
}
return data
}
// collectAnyoneReachability runs a second pass to check ORPort reachability across nodes.
// Called after all nodes are collected so we know which nodes run relays.
func collectAnyoneReachability(ctx context.Context, data *ClusterData) {
// Find all nodes running the relay (have ORPort listening)
var relayHosts []string
for host, nd := range data.Nodes {
if nd.Anyone != nil && nd.Anyone.RelayActive && nd.Anyone.ORPortListening {
relayHosts = append(relayHosts, host)
}
}
if len(relayHosts) == 0 {
return
}
// From each node, try to TCP connect to each relay's ORPort 9001
var mu sync.Mutex
var wg sync.WaitGroup
for _, nd := range data.Nodes {
if nd.Anyone == nil {
continue
}
wg.Add(1)
go func(nd *NodeData) {
defer wg.Done()
// Build commands to test TCP connectivity to each relay
var tcpCmds string
for _, relayHost := range relayHosts {
if relayHost == nd.Node.Host {
continue // skip self
}
tcpCmds += fmt.Sprintf(
`echo "ORPORT:%s:$(timeout 3 bash -c 'echo >/dev/tcp/%s/9001' 2>/dev/null && echo ok || echo fail)"
`, relayHost, relayHost)
}
if tcpCmds == "" {
return
}
res := RunSSH(ctx, nd.Node, tcpCmds)
if res.Stdout == "" {
return
}
mu.Lock()
defer mu.Unlock()
for _, line := range strings.Split(res.Stdout, "\n") {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "ORPORT:") {
p := strings.SplitN(line, ":", 3)
if len(p) == 3 {
nd.Anyone.ORPortReachable[p[1]] = p[2] == "ok"
}
}
}
}(nd)
}
wg.Wait()
}
func collectNamespaces(ctx context.Context, node Node) []NamespaceData { func collectNamespaces(ctx context.Context, node Node) []NamespaceData {
// Detect namespace services: debros-namespace-gateway@<name>.service // Detect namespace services: debros-namespace-gateway@<name>.service
cmd := ` cmd := `

View File

@ -0,0 +1,354 @@
package inspector
import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
)
// FailureGroup groups identical check failures/warnings across nodes.
type FailureGroup struct {
ID string
Name string // from first check in group
Status Status
Severity Severity
Subsystem string
Nodes []string // affected node names (deduplicated)
Messages []string // unique messages (capped at 5)
Count int // total raw occurrence count (before dedup)
}
// GroupFailures collapses CheckResults into unique failure groups keyed by (ID, Status).
// Only failures and warnings are grouped; passes and skips are ignored.
func GroupFailures(results *Results) []FailureGroup {
type groupKey struct {
ID string
Status Status
}
seen := map[groupKey]*FailureGroup{}
nodesSeen := map[groupKey]map[string]bool{}
var order []groupKey
for _, c := range results.Checks {
if c.Status != StatusFail && c.Status != StatusWarn {
continue
}
k := groupKey{ID: c.ID, Status: c.Status}
g, exists := seen[k]
if !exists {
g = &FailureGroup{
ID: c.ID,
Name: c.Name,
Status: c.Status,
Severity: c.Severity,
Subsystem: c.Subsystem,
}
seen[k] = g
nodesSeen[k] = map[string]bool{}
order = append(order, k)
}
g.Count++
node := c.Node
if node == "" {
node = "cluster-wide"
}
// Deduplicate nodes (a node may appear for multiple targets)
if !nodesSeen[k][node] {
nodesSeen[k][node] = true
g.Nodes = append(g.Nodes, node)
}
// Track unique messages (cap at 5 to avoid bloat)
if len(g.Messages) < 5 {
found := false
for _, m := range g.Messages {
if m == c.Message {
found = true
break
}
}
if !found {
g.Messages = append(g.Messages, c.Message)
}
}
}
// Sort: failures before warnings, then by severity (high first), then by ID
groups := make([]FailureGroup, 0, len(order))
for _, k := range order {
groups = append(groups, *seen[k])
}
sort.Slice(groups, func(i, j int) bool {
oi, oj := statusOrder(groups[i].Status), statusOrder(groups[j].Status)
if oi != oj {
return oi < oj
}
if groups[i].Severity != groups[j].Severity {
return groups[i].Severity > groups[j].Severity
}
return groups[i].ID < groups[j].ID
})
return groups
}
// WriteResults saves inspection results as markdown files to a timestamped directory.
// Returns the output directory path.
func WriteResults(baseDir, env string, results *Results, data *ClusterData, analysis *AnalysisResult) (string, error) {
ts := time.Now().Format("2006-01-02_150405")
dir := filepath.Join(baseDir, env, ts)
if err := os.MkdirAll(dir, 0o755); err != nil {
return "", fmt.Errorf("create output directory: %w", err)
}
groups := GroupFailures(results)
// Build analysis lookup: groupID -> analysis text
analysisMap := map[string]string{}
if analysis != nil {
for _, sa := range analysis.Analyses {
key := sa.GroupID
if key == "" {
key = sa.Subsystem
}
if sa.Error == nil {
analysisMap[key] = sa.Analysis
}
}
}
// Write summary.md
if err := writeSummary(dir, env, ts, results, data, groups, analysisMap); err != nil {
return "", fmt.Errorf("write summary: %w", err)
}
// Group checks by subsystem for per-subsystem files
checksBySubsystem := map[string][]CheckResult{}
for _, c := range results.Checks {
checksBySubsystem[c.Subsystem] = append(checksBySubsystem[c.Subsystem], c)
}
groupsBySubsystem := map[string][]FailureGroup{}
for _, g := range groups {
groupsBySubsystem[g.Subsystem] = append(groupsBySubsystem[g.Subsystem], g)
}
// Write per-subsystem files
for sub, checks := range checksBySubsystem {
subGroups := groupsBySubsystem[sub]
if err := writeSubsystem(dir, sub, ts, checks, subGroups, analysisMap); err != nil {
return "", fmt.Errorf("write %s: %w", sub, err)
}
}
return dir, nil
}
func writeSummary(dir, env, ts string, results *Results, data *ClusterData, groups []FailureGroup, analysisMap map[string]string) error {
var b strings.Builder
passed, failed, warned, skipped := results.Summary()
b.WriteString(fmt.Sprintf("# %s Inspection Report\n\n", strings.ToUpper(env)))
b.WriteString(fmt.Sprintf("**Date:** %s \n", ts))
b.WriteString(fmt.Sprintf("**Nodes:** %d \n", len(data.Nodes)))
b.WriteString(fmt.Sprintf("**Total:** %d passed, %d failed, %d warnings, %d skipped \n\n", passed, failed, warned, skipped))
// Per-subsystem table
subStats := map[string][4]int{} // [pass, fail, warn, skip]
var subsystems []string
for _, c := range results.Checks {
if _, exists := subStats[c.Subsystem]; !exists {
subsystems = append(subsystems, c.Subsystem)
}
s := subStats[c.Subsystem]
switch c.Status {
case StatusPass:
s[0]++
case StatusFail:
s[1]++
case StatusWarn:
s[2]++
case StatusSkip:
s[3]++
}
subStats[c.Subsystem] = s
}
sort.Strings(subsystems)
// Count issue groups per subsystem
issueCountBySub := map[string]int{}
for _, g := range groups {
issueCountBySub[g.Subsystem]++
}
b.WriteString("## Subsystems\n\n")
b.WriteString("| Subsystem | Pass | Fail | Warn | Skip | Issues |\n")
b.WriteString("|-----------|------|------|------|------|--------|\n")
for _, sub := range subsystems {
s := subStats[sub]
issues := issueCountBySub[sub]
link := fmt.Sprintf("[%s](%s.md)", sub, sub)
b.WriteString(fmt.Sprintf("| %s | %d | %d | %d | %d | %d |\n", link, s[0], s[1], s[2], s[3], issues))
}
b.WriteString("\n")
// Critical issues section
critical := filterGroupsBySeverity(groups, High)
if len(critical) > 0 {
b.WriteString("## Critical Issues\n\n")
for i, g := range critical {
icon := "FAIL"
if g.Status == StatusWarn {
icon = "WARN"
}
nodeInfo := fmt.Sprintf("%d nodes", len(g.Nodes))
if g.Count > len(g.Nodes) {
nodeInfo = fmt.Sprintf("%d nodes (%d occurrences)", len(g.Nodes), g.Count)
}
b.WriteString(fmt.Sprintf("%d. **[%s]** %s — %s \n", i+1, icon, g.Name, nodeInfo))
b.WriteString(fmt.Sprintf(" *%s* → [details](%s.md#%s) \n",
g.Messages[0], g.Subsystem, anchorID(g.Name)))
}
b.WriteString("\n")
}
// Collection errors
var errs []string
for _, nd := range data.Nodes {
for _, e := range nd.Errors {
errs = append(errs, fmt.Sprintf("- **%s**: %s", nd.Node.Name(), e))
}
}
if len(errs) > 0 {
b.WriteString("## Collection Errors\n\n")
for _, e := range errs {
b.WriteString(e + "\n")
}
b.WriteString("\n")
}
return os.WriteFile(filepath.Join(dir, "summary.md"), []byte(b.String()), 0o644)
}
func writeSubsystem(dir, subsystem, ts string, checks []CheckResult, groups []FailureGroup, analysisMap map[string]string) error {
var b strings.Builder
// Count
var passed, failed, warned, skipped int
for _, c := range checks {
switch c.Status {
case StatusPass:
passed++
case StatusFail:
failed++
case StatusWarn:
warned++
case StatusSkip:
skipped++
}
}
b.WriteString(fmt.Sprintf("# %s\n\n", strings.ToUpper(subsystem)))
b.WriteString(fmt.Sprintf("**Date:** %s \n", ts))
b.WriteString(fmt.Sprintf("**Checks:** %d passed, %d failed, %d warnings, %d skipped \n\n", passed, failed, warned, skipped))
// Issues section
if len(groups) > 0 {
b.WriteString("## Issues\n\n")
for i, g := range groups {
icon := "FAIL"
if g.Status == StatusWarn {
icon = "WARN"
}
b.WriteString(fmt.Sprintf("### %d. %s\n\n", i+1, g.Name))
nodeInfo := fmt.Sprintf("%d nodes", len(g.Nodes))
if g.Count > len(g.Nodes) {
nodeInfo = fmt.Sprintf("%d nodes (%d occurrences)", len(g.Nodes), g.Count)
}
b.WriteString(fmt.Sprintf("**Status:** %s | **Severity:** %s | **Affected:** %s \n\n", icon, g.Severity, nodeInfo))
// Affected nodes
b.WriteString("**Affected nodes:**\n")
for _, n := range g.Nodes {
b.WriteString(fmt.Sprintf("- `%s`\n", n))
}
b.WriteString("\n")
// Messages
if len(g.Messages) == 1 {
b.WriteString(fmt.Sprintf("**Detail:** %s\n\n", g.Messages[0]))
} else {
b.WriteString("**Details:**\n")
for _, m := range g.Messages {
b.WriteString(fmt.Sprintf("- %s\n", m))
}
b.WriteString("\n")
}
// AI analysis (if available)
if ai, ok := analysisMap[g.ID]; ok {
b.WriteString(ai)
b.WriteString("\n\n")
}
b.WriteString("---\n\n")
}
}
// All checks table
b.WriteString("## All Checks\n\n")
b.WriteString("| Status | Severity | Check | Node | Detail |\n")
b.WriteString("|--------|----------|-------|------|--------|\n")
// Sort: failures first
sorted := make([]CheckResult, len(checks))
copy(sorted, checks)
sort.Slice(sorted, func(i, j int) bool {
oi, oj := statusOrder(sorted[i].Status), statusOrder(sorted[j].Status)
if oi != oj {
return oi < oj
}
if sorted[i].Severity != sorted[j].Severity {
return sorted[i].Severity > sorted[j].Severity
}
return sorted[i].ID < sorted[j].ID
})
for _, c := range sorted {
node := c.Node
if node == "" {
node = "cluster-wide"
}
msg := strings.ReplaceAll(c.Message, "|", "\\|")
b.WriteString(fmt.Sprintf("| %s | %s | %s | %s | %s |\n",
statusIcon(c.Status), c.Severity, c.Name, node, msg))
}
return os.WriteFile(filepath.Join(dir, subsystem+".md"), []byte(b.String()), 0o644)
}
func filterGroupsBySeverity(groups []FailureGroup, minSeverity Severity) []FailureGroup {
var out []FailureGroup
for _, g := range groups {
if g.Severity >= minSeverity {
out = append(out, g)
}
}
return out
}
func anchorID(name string) string {
s := strings.ToLower(name)
s = strings.ReplaceAll(s, " ", "-")
s = strings.Map(func(r rune) rune {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' {
return r
}
return -1
}, s)
return s
}