mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-03-17 09:36:56 +00:00
Improved how we present results on inspector and added anyone tests
This commit is contained in:
parent
ebdd08f71c
commit
4356f5544a
2
.gitignore
vendored
2
.gitignore
vendored
@ -106,3 +106,5 @@ terms-agreement
|
|||||||
|
|
||||||
cli
|
cli
|
||||||
./inspector
|
./inspector
|
||||||
|
|
||||||
|
results/
|
||||||
@ -51,10 +51,12 @@ func HandleInspectCommand(args []string) {
|
|||||||
|
|
||||||
configPath := fs.String("config", "scripts/remote-nodes.conf", "Path to remote-nodes.conf")
|
configPath := fs.String("config", "scripts/remote-nodes.conf", "Path to remote-nodes.conf")
|
||||||
env := fs.String("env", "", "Environment to inspect (devnet, testnet)")
|
env := fs.String("env", "", "Environment to inspect (devnet, testnet)")
|
||||||
subsystem := fs.String("subsystem", "all", "Subsystem to inspect (rqlite,olric,ipfs,dns,wg,system,network,all)")
|
subsystem := fs.String("subsystem", "all", "Subsystem to inspect (rqlite,olric,ipfs,dns,wg,system,network,anyone,all)")
|
||||||
format := fs.String("format", "table", "Output format (table, json)")
|
format := fs.String("format", "table", "Output format (table, json)")
|
||||||
timeout := fs.Duration("timeout", 30*time.Second, "SSH command timeout")
|
timeout := fs.Duration("timeout", 30*time.Second, "SSH command timeout")
|
||||||
verbose := fs.Bool("verbose", false, "Verbose output")
|
verbose := fs.Bool("verbose", false, "Verbose output")
|
||||||
|
// Output flags
|
||||||
|
outputDir := fs.String("output", "", "Save results to directory as markdown (e.g., ./results)")
|
||||||
// AI flags
|
// AI flags
|
||||||
aiEnabled := fs.Bool("ai", false, "Enable AI analysis of failures")
|
aiEnabled := fs.Bool("ai", false, "Enable AI analysis of failures")
|
||||||
aiModel := fs.String("model", "moonshotai/kimi-k2.5", "OpenRouter model for AI analysis")
|
aiModel := fs.String("model", "moonshotai/kimi-k2.5", "OpenRouter model for AI analysis")
|
||||||
@ -70,6 +72,7 @@ func HandleInspectCommand(args []string) {
|
|||||||
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --subsystem rqlite\n")
|
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --subsystem rqlite\n")
|
||||||
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai\n")
|
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai\n")
|
||||||
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --model openai/gpt-4o\n")
|
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --model openai/gpt-4o\n")
|
||||||
|
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --output ./results\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := fs.Parse(args); err != nil {
|
if err := fs.Parse(args); err != nil {
|
||||||
@ -136,18 +139,31 @@ func HandleInspectCommand(args []string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Phase 4: AI Analysis (if enabled and there are failures or warnings)
|
// Phase 4: AI Analysis (if enabled and there are failures or warnings)
|
||||||
|
var analysis *inspector.AnalysisResult
|
||||||
if *aiEnabled {
|
if *aiEnabled {
|
||||||
issues := results.FailuresAndWarnings()
|
issues := results.FailuresAndWarnings()
|
||||||
if len(issues) == 0 {
|
if len(issues) == 0 {
|
||||||
fmt.Printf("\nAll checks passed — no AI analysis needed.\n")
|
fmt.Printf("\nAll checks passed — no AI analysis needed.\n")
|
||||||
|
} else if *outputDir != "" {
|
||||||
|
// Per-group AI analysis for file output
|
||||||
|
groups := inspector.GroupFailures(results)
|
||||||
|
fmt.Printf("\nAnalyzing %d unique issues with %s...\n", len(groups), *aiModel)
|
||||||
|
var err error
|
||||||
|
analysis, err = inspector.AnalyzeGroups(groups, results, data, *aiModel, *aiAPIKey)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err)
|
||||||
} else {
|
} else {
|
||||||
// Count affected subsystems
|
inspector.PrintAnalysis(analysis, os.Stdout)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Per-subsystem AI analysis for terminal output
|
||||||
subs := map[string]bool{}
|
subs := map[string]bool{}
|
||||||
for _, c := range issues {
|
for _, c := range issues {
|
||||||
subs[c.Subsystem] = true
|
subs[c.Subsystem] = true
|
||||||
}
|
}
|
||||||
fmt.Printf("\nAnalyzing %d issues across %d subsystems with %s...\n", len(issues), len(subs), *aiModel)
|
fmt.Printf("\nAnalyzing %d issues across %d subsystems with %s...\n", len(issues), len(subs), *aiModel)
|
||||||
analysis, err := inspector.Analyze(results, data, *aiModel, *aiAPIKey)
|
var err error
|
||||||
|
analysis, err = inspector.Analyze(results, data, *aiModel, *aiAPIKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err)
|
fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err)
|
||||||
} else {
|
} else {
|
||||||
@ -156,6 +172,16 @@ func HandleInspectCommand(args []string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Phase 5: Write results to disk (if --output is set)
|
||||||
|
if *outputDir != "" {
|
||||||
|
outPath, err := inspector.WriteResults(*outputDir, *env, results, data, analysis)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "\nError writing results: %v\n", err)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("\nResults saved to %s\n", outPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Exit with non-zero if any failures
|
// Exit with non-zero if any failures
|
||||||
if failures := results.Failures(); len(failures) > 0 {
|
if failures := results.Failures(); len(failures) > 0 {
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|||||||
@ -52,9 +52,10 @@ Step-by-step commands to resolve. Include actual node IPs/names from the data wh
|
|||||||
### Prevention
|
### Prevention
|
||||||
What could prevent this in the future? (omit if not applicable)`
|
What could prevent this in the future? (omit if not applicable)`
|
||||||
|
|
||||||
// SubsystemAnalysis holds the AI analysis for a single subsystem.
|
// SubsystemAnalysis holds the AI analysis for a single subsystem or failure group.
|
||||||
type SubsystemAnalysis struct {
|
type SubsystemAnalysis struct {
|
||||||
Subsystem string
|
Subsystem string
|
||||||
|
GroupID string // e.g. "anyone.bootstrapped" — empty when analyzing whole subsystem
|
||||||
Analysis string
|
Analysis string
|
||||||
Duration time.Duration
|
Duration time.Duration
|
||||||
Error error
|
Error error
|
||||||
@ -149,6 +150,125 @@ func Analyze(results *Results, data *ClusterData, model, apiKey string) (*Analys
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AnalyzeGroups sends each failure group to OpenRouter for focused AI analysis.
|
||||||
|
// Unlike Analyze which sends one call per subsystem, this sends one call per unique
|
||||||
|
// failure pattern, producing more focused and actionable results.
|
||||||
|
func AnalyzeGroups(groups []FailureGroup, results *Results, data *ClusterData, model, apiKey string) (*AnalysisResult, error) {
|
||||||
|
if apiKey == "" {
|
||||||
|
apiKey = os.Getenv("OPENROUTER_API_KEY")
|
||||||
|
}
|
||||||
|
if apiKey == "" {
|
||||||
|
return nil, fmt.Errorf("no API key: set --api-key or OPENROUTER_API_KEY env")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(groups) == 0 {
|
||||||
|
return &AnalysisResult{Model: model}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build shared context
|
||||||
|
issuesBySubsystem := map[string][]CheckResult{}
|
||||||
|
for _, c := range results.FailuresAndWarnings() {
|
||||||
|
issuesBySubsystem[c.Subsystem] = append(issuesBySubsystem[c.Subsystem], c)
|
||||||
|
}
|
||||||
|
healthySummary := buildHealthySummary(results, issuesBySubsystem)
|
||||||
|
collectionErrors := buildCollectionErrors(data)
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
var mu sync.Mutex
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
var analyses []SubsystemAnalysis
|
||||||
|
|
||||||
|
for _, g := range groups {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(group FailureGroup) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
prompt := buildGroupPrompt(group, data, healthySummary, collectionErrors)
|
||||||
|
subStart := time.Now()
|
||||||
|
response, err := callOpenRouter(model, apiKey, prompt)
|
||||||
|
|
||||||
|
sa := SubsystemAnalysis{
|
||||||
|
Subsystem: group.Subsystem,
|
||||||
|
GroupID: group.ID,
|
||||||
|
Duration: time.Since(subStart),
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
sa.Error = err
|
||||||
|
} else {
|
||||||
|
sa.Analysis = response
|
||||||
|
}
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
analyses = append(analyses, sa)
|
||||||
|
mu.Unlock()
|
||||||
|
}(g)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
// Sort by subsystem then group ID for consistent output
|
||||||
|
sort.Slice(analyses, func(i, j int) bool {
|
||||||
|
if analyses[i].Subsystem != analyses[j].Subsystem {
|
||||||
|
return analyses[i].Subsystem < analyses[j].Subsystem
|
||||||
|
}
|
||||||
|
return analyses[i].GroupID < analyses[j].GroupID
|
||||||
|
})
|
||||||
|
|
||||||
|
return &AnalysisResult{
|
||||||
|
Model: model,
|
||||||
|
Analyses: analyses,
|
||||||
|
Duration: time.Since(start),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildGroupPrompt(group FailureGroup, data *ClusterData, healthySummary, collectionErrors string) string {
|
||||||
|
var b strings.Builder
|
||||||
|
|
||||||
|
icon := "FAILURE"
|
||||||
|
if group.Status == StatusWarn {
|
||||||
|
icon = "WARNING"
|
||||||
|
}
|
||||||
|
|
||||||
|
b.WriteString(fmt.Sprintf("## %s: %s\n\n", icon, group.Name))
|
||||||
|
b.WriteString(fmt.Sprintf("**Check ID:** %s \n", group.ID))
|
||||||
|
b.WriteString(fmt.Sprintf("**Severity:** %s \n", group.Severity))
|
||||||
|
b.WriteString(fmt.Sprintf("**Nodes affected:** %d \n\n", len(group.Nodes)))
|
||||||
|
|
||||||
|
b.WriteString("**Affected nodes:**\n")
|
||||||
|
for _, n := range group.Nodes {
|
||||||
|
b.WriteString(fmt.Sprintf("- %s\n", n))
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
|
||||||
|
b.WriteString("**Error messages:**\n")
|
||||||
|
for _, m := range group.Messages {
|
||||||
|
b.WriteString(fmt.Sprintf("- %s\n", m))
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
|
||||||
|
// Subsystem raw data
|
||||||
|
contextData := buildSubsystemContext(group.Subsystem, data)
|
||||||
|
if contextData != "" {
|
||||||
|
b.WriteString(fmt.Sprintf("## %s Raw Data (all nodes)\n", strings.ToUpper(group.Subsystem)))
|
||||||
|
b.WriteString(contextData)
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if healthySummary != "" {
|
||||||
|
b.WriteString("## Healthy Subsystems\n")
|
||||||
|
b.WriteString(healthySummary)
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if collectionErrors != "" {
|
||||||
|
b.WriteString("## Collection Errors\n")
|
||||||
|
b.WriteString(collectionErrors)
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
b.WriteString(fmt.Sprintf("\nAnalyze this specific %s issue. Be concise — focus on this one problem.\n", group.Subsystem))
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
func buildClusterOverview(data *ClusterData, results *Results) string {
|
func buildClusterOverview(data *ClusterData, results *Results) string {
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
b.WriteString(fmt.Sprintf("Nodes: %d\n", len(data.Nodes)))
|
b.WriteString(fmt.Sprintf("Nodes: %d\n", len(data.Nodes)))
|
||||||
@ -286,6 +406,8 @@ func buildSubsystemContext(subsystem string, data *ClusterData) string {
|
|||||||
return buildNetworkContext(data)
|
return buildNetworkContext(data)
|
||||||
case "namespace":
|
case "namespace":
|
||||||
return buildNamespaceContext(data)
|
return buildNamespaceContext(data)
|
||||||
|
case "anyone":
|
||||||
|
return buildAnyoneContext(data)
|
||||||
default:
|
default:
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
@ -486,6 +608,40 @@ func buildNamespaceContext(data *ClusterData) string {
|
|||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildAnyoneContext(data *ClusterData) string {
|
||||||
|
var b strings.Builder
|
||||||
|
for host, nd := range data.Nodes {
|
||||||
|
if nd.Anyone == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
a := nd.Anyone
|
||||||
|
if !a.RelayActive && !a.ClientActive {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
b.WriteString(fmt.Sprintf("### %s\n", host))
|
||||||
|
b.WriteString(fmt.Sprintf(" relay=%v client=%v orport=%v socks=%v control=%v\n",
|
||||||
|
a.RelayActive, a.ClientActive, a.ORPortListening, a.SocksListening, a.ControlListening))
|
||||||
|
if a.RelayActive {
|
||||||
|
b.WriteString(fmt.Sprintf(" bootstrap=%d%% fingerprint=%s nickname=%s\n",
|
||||||
|
a.BootstrapPct, a.Fingerprint, a.Nickname))
|
||||||
|
}
|
||||||
|
if len(a.ORPortReachable) > 0 {
|
||||||
|
var unreachable []string
|
||||||
|
for h, ok := range a.ORPortReachable {
|
||||||
|
if !ok {
|
||||||
|
unreachable = append(unreachable, h)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(unreachable) > 0 {
|
||||||
|
b.WriteString(fmt.Sprintf(" orport_unreachable: %s\n", strings.Join(unreachable, ", ")))
|
||||||
|
} else {
|
||||||
|
b.WriteString(fmt.Sprintf(" orport: all %d peers reachable\n", len(a.ORPortReachable)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
// OpenRouter API types (OpenAI-compatible)
|
// OpenRouter API types (OpenAI-compatible)
|
||||||
|
|
||||||
type openRouterRequest struct {
|
type openRouterRequest struct {
|
||||||
@ -531,7 +687,7 @@ func callOpenRouter(model, apiKey, prompt string) (string, error) {
|
|||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
req.Header.Set("Authorization", "Bearer "+apiKey)
|
req.Header.Set("Authorization", "Bearer "+apiKey)
|
||||||
|
|
||||||
client := &http.Client{Timeout: 120 * time.Second}
|
client := &http.Client{Timeout: 180 * time.Second}
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("HTTP request: %w", err)
|
return "", fmt.Errorf("HTTP request: %w", err)
|
||||||
|
|||||||
170
pkg/inspector/checks/anyone.go
Normal file
170
pkg/inspector/checks/anyone.go
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
package checks
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/inspector"
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
inspector.RegisterChecker("anyone", CheckAnyone)
|
||||||
|
}
|
||||||
|
|
||||||
|
const anyoneSub = "anyone"
|
||||||
|
|
||||||
|
// CheckAnyone runs all Anyone relay/client health checks.
|
||||||
|
func CheckAnyone(data *inspector.ClusterData) []inspector.CheckResult {
|
||||||
|
var results []inspector.CheckResult
|
||||||
|
|
||||||
|
for _, nd := range data.Nodes {
|
||||||
|
if nd.Anyone == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
results = append(results, checkAnyonePerNode(nd)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
results = append(results, checkAnyoneCrossNode(data)...)
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkAnyonePerNode(nd *inspector.NodeData) []inspector.CheckResult {
|
||||||
|
var r []inspector.CheckResult
|
||||||
|
a := nd.Anyone
|
||||||
|
node := nd.Node.Name()
|
||||||
|
|
||||||
|
// If neither service is active, skip all checks for this node
|
||||||
|
if !a.RelayActive && !a.ClientActive {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Relay checks ---
|
||||||
|
if a.RelayActive {
|
||||||
|
r = append(r, inspector.Pass("anyone.relay_active", "Anyone relay service active", anyoneSub, node,
|
||||||
|
"debros-anyone-relay is active", inspector.High))
|
||||||
|
|
||||||
|
// ORPort listening
|
||||||
|
if a.ORPortListening {
|
||||||
|
r = append(r, inspector.Pass("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node,
|
||||||
|
"port 9001 bound", inspector.High))
|
||||||
|
} else {
|
||||||
|
r = append(r, inspector.Fail("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node,
|
||||||
|
"port 9001 NOT bound", inspector.High))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Control port
|
||||||
|
if a.ControlListening {
|
||||||
|
r = append(r, inspector.Pass("anyone.control_listening", "Control port 9051 listening", anyoneSub, node,
|
||||||
|
"port 9051 bound", inspector.Low))
|
||||||
|
} else {
|
||||||
|
r = append(r, inspector.Warn("anyone.control_listening", "Control port 9051 listening", anyoneSub, node,
|
||||||
|
"port 9051 NOT bound (monitoring unavailable)", inspector.Low))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bootstrap status
|
||||||
|
if a.Bootstrapped {
|
||||||
|
r = append(r, inspector.Pass("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
|
||||||
|
fmt.Sprintf("bootstrap=%d%%", a.BootstrapPct), inspector.High))
|
||||||
|
} else if a.BootstrapPct > 0 {
|
||||||
|
r = append(r, inspector.Warn("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
|
||||||
|
fmt.Sprintf("bootstrap=%d%% (still connecting)", a.BootstrapPct), inspector.High))
|
||||||
|
} else {
|
||||||
|
r = append(r, inspector.Fail("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
|
||||||
|
"bootstrap=0% (not started or log missing)", inspector.High))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fingerprint present
|
||||||
|
if a.Fingerprint != "" {
|
||||||
|
r = append(r, inspector.Pass("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node,
|
||||||
|
fmt.Sprintf("fingerprint=%s", a.Fingerprint), inspector.Medium))
|
||||||
|
} else {
|
||||||
|
r = append(r, inspector.Warn("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node,
|
||||||
|
"no fingerprint found (relay may not have generated keys yet)", inspector.Medium))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nickname configured
|
||||||
|
if a.Nickname != "" {
|
||||||
|
r = append(r, inspector.Pass("anyone.nickname", "Relay nickname configured", anyoneSub, node,
|
||||||
|
fmt.Sprintf("nickname=%s", a.Nickname), inspector.Low))
|
||||||
|
} else {
|
||||||
|
r = append(r, inspector.Warn("anyone.nickname", "Relay nickname configured", anyoneSub, node,
|
||||||
|
"no nickname in /etc/anon/anonrc", inspector.Low))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Client checks ---
|
||||||
|
if a.ClientActive {
|
||||||
|
r = append(r, inspector.Pass("anyone.client_active", "Anyone client service active", anyoneSub, node,
|
||||||
|
"debros-anyone-client is active", inspector.High))
|
||||||
|
|
||||||
|
// SOCKS5 port listening
|
||||||
|
if a.SocksListening {
|
||||||
|
r = append(r, inspector.Pass("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node,
|
||||||
|
"port 9050 bound", inspector.High))
|
||||||
|
} else {
|
||||||
|
r = append(r, inspector.Fail("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node,
|
||||||
|
"port 9050 NOT bound (IPFS traffic cannot route through anonymity network)", inspector.High))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkAnyoneCrossNode(data *inspector.ClusterData) []inspector.CheckResult {
|
||||||
|
var r []inspector.CheckResult
|
||||||
|
|
||||||
|
// Count relay and client nodes
|
||||||
|
relayActive := 0
|
||||||
|
relayTotal := 0
|
||||||
|
clientActive := 0
|
||||||
|
clientTotal := 0
|
||||||
|
|
||||||
|
for _, nd := range data.Nodes {
|
||||||
|
if nd.Anyone == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if nd.Anyone.RelayActive {
|
||||||
|
relayActive++
|
||||||
|
relayTotal++
|
||||||
|
}
|
||||||
|
if nd.Anyone.ClientActive {
|
||||||
|
clientActive++
|
||||||
|
clientTotal++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip cross-node checks if no Anyone services at all
|
||||||
|
if relayTotal == 0 && clientTotal == 0 {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// ORPort reachability: check if relays are publicly accessible from other nodes
|
||||||
|
orportChecked := 0
|
||||||
|
orportReachable := 0
|
||||||
|
orportFailed := 0
|
||||||
|
|
||||||
|
for _, nd := range data.Nodes {
|
||||||
|
if nd.Anyone == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for host, ok := range nd.Anyone.ORPortReachable {
|
||||||
|
orportChecked++
|
||||||
|
if ok {
|
||||||
|
orportReachable++
|
||||||
|
} else {
|
||||||
|
orportFailed++
|
||||||
|
r = append(r, inspector.Fail("anyone.orport_reachable",
|
||||||
|
fmt.Sprintf("ORPort 9001 reachable on %s", host),
|
||||||
|
anyoneSub, nd.Node.Name(),
|
||||||
|
fmt.Sprintf("cannot TCP connect to %s:9001 from %s", host, nd.Node.Name()), inspector.High))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if orportChecked > 0 && orportFailed == 0 {
|
||||||
|
r = append(r, inspector.Pass("anyone.orport_reachable", "ORPort 9001 reachable across nodes", anyoneSub, "",
|
||||||
|
fmt.Sprintf("all %d cross-node connections OK", orportReachable), inspector.High))
|
||||||
|
}
|
||||||
|
|
||||||
|
return r
|
||||||
|
}
|
||||||
219
pkg/inspector/checks/anyone_test.go
Normal file
219
pkg/inspector/checks/anyone_test.go
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
package checks
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/DeBrosOfficial/network/pkg/inspector"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCheckAnyone_NilData(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
if len(results) != 0 {
|
||||||
|
t.Errorf("expected 0 results for nil Anyone data, got %d", len(results))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_BothInactive(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
if len(results) != 0 {
|
||||||
|
t.Errorf("expected 0 results when both services inactive, got %d", len(results))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_HealthyRelay(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
ControlListening: true,
|
||||||
|
Bootstrapped: true,
|
||||||
|
BootstrapPct: 100,
|
||||||
|
Fingerprint: "ABCDEF1234567890",
|
||||||
|
Nickname: "OramaRelay1",
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.relay_active", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.orport_listening", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.control_listening", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.fingerprint", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.nickname", inspector.StatusPass)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_HealthyClient(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
ClientActive: true,
|
||||||
|
SocksListening: true,
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.client_active", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_RelayORPortDown(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: false,
|
||||||
|
ControlListening: true,
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.orport_listening", inspector.StatusFail)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_RelayNotBootstrapped(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
BootstrapPct: 0,
|
||||||
|
Bootstrapped: false,
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusFail)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_RelayPartialBootstrap(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
BootstrapPct: 75,
|
||||||
|
Bootstrapped: false,
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusWarn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_ClientSocksDown(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
ClientActive: true,
|
||||||
|
SocksListening: false,
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.socks_listening", inspector.StatusFail)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_NoFingerprint(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
Fingerprint: "",
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.fingerprint", inspector.StatusWarn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_CrossNode_ORPortReachable(t *testing.T) {
|
||||||
|
nd1 := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd1.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
ORPortReachable: map[string]bool{"2.2.2.2": true},
|
||||||
|
}
|
||||||
|
|
||||||
|
nd2 := makeNodeData("2.2.2.2", "node")
|
||||||
|
nd2.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
ORPortReachable: map[string]bool{"1.1.1.1": true},
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd1, "2.2.2.2": nd2})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
expectStatus(t, results, "anyone.orport_reachable", inspector.StatusPass)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_CrossNode_ORPortUnreachable(t *testing.T) {
|
||||||
|
nd1 := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd1.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
ORPortReachable: map[string]bool{"2.2.2.2": false},
|
||||||
|
}
|
||||||
|
|
||||||
|
nd2 := makeNodeData("2.2.2.2", "node")
|
||||||
|
nd2.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
ORPortReachable: map[string]bool{"1.1.1.1": true},
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd1, "2.2.2.2": nd2})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
// Should have at least one fail for the unreachable connection
|
||||||
|
hasFail := false
|
||||||
|
for _, r := range results {
|
||||||
|
if r.ID == "anyone.orport_reachable" && r.Status == inspector.StatusFail {
|
||||||
|
hasFail = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !hasFail {
|
||||||
|
t.Error("expected at least one anyone.orport_reachable fail")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckAnyone_BothRelayAndClient(t *testing.T) {
|
||||||
|
nd := makeNodeData("1.1.1.1", "node")
|
||||||
|
nd.Anyone = &inspector.AnyoneData{
|
||||||
|
RelayActive: true,
|
||||||
|
ClientActive: true,
|
||||||
|
ORPortListening: true,
|
||||||
|
SocksListening: true,
|
||||||
|
ControlListening: true,
|
||||||
|
Bootstrapped: true,
|
||||||
|
BootstrapPct: 100,
|
||||||
|
Fingerprint: "ABCDEF",
|
||||||
|
Nickname: "test",
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||||
|
results := CheckAnyone(data)
|
||||||
|
|
||||||
|
// Should have both relay and client checks
|
||||||
|
expectStatus(t, results, "anyone.relay_active", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.client_active", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass)
|
||||||
|
expectStatus(t, results, "anyone.orport_listening", inspector.StatusPass)
|
||||||
|
}
|
||||||
@ -50,6 +50,22 @@ func checkSystemPerNode(nd *inspector.NodeData) []inspector.CheckResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 6.2 Anyone relay/client services (only check if installed, don't fail if absent)
|
||||||
|
for _, svc := range []string{"debros-anyone-relay", "debros-anyone-client"} {
|
||||||
|
status, ok := sys.Services[svc]
|
||||||
|
if !ok || status == "inactive" {
|
||||||
|
continue // not installed or intentionally stopped
|
||||||
|
}
|
||||||
|
id := fmt.Sprintf("system.svc_%s", strings.ReplaceAll(svc, "-", "_"))
|
||||||
|
name := fmt.Sprintf("%s service active", svc)
|
||||||
|
if status == "active" {
|
||||||
|
r = append(r, inspector.Pass(id, name, systemSub, node, "active", inspector.High))
|
||||||
|
} else {
|
||||||
|
r = append(r, inspector.Fail(id, name, systemSub, node,
|
||||||
|
fmt.Sprintf("status=%s (should be active or uninstalled)", status), inspector.High))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 6.5 WireGuard service
|
// 6.5 WireGuard service
|
||||||
if status, ok := sys.Services["wg-quick@wg0"]; ok {
|
if status, ok := sys.Services["wg-quick@wg0"]; ok {
|
||||||
if status == "active" {
|
if status == "active" {
|
||||||
|
|||||||
@ -26,6 +26,7 @@ type NodeData struct {
|
|||||||
WireGuard *WireGuardData
|
WireGuard *WireGuardData
|
||||||
System *SystemData
|
System *SystemData
|
||||||
Network *NetworkData
|
Network *NetworkData
|
||||||
|
Anyone *AnyoneData
|
||||||
Namespaces []NamespaceData // namespace instances on this node
|
Namespaces []NamespaceData // namespace instances on this node
|
||||||
Errors []string // collection errors for this node
|
Errors []string // collection errors for this node
|
||||||
}
|
}
|
||||||
@ -224,6 +225,21 @@ type NetworkData struct {
|
|||||||
PingResults map[string]bool // WG peer IP → ping success
|
PingResults map[string]bool // WG peer IP → ping success
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AnyoneData holds parsed Anyone relay/client status from a node.
|
||||||
|
type AnyoneData struct {
|
||||||
|
RelayActive bool // debros-anyone-relay systemd service active
|
||||||
|
ClientActive bool // debros-anyone-client systemd service active
|
||||||
|
ORPortListening bool // port 9001 bound locally
|
||||||
|
SocksListening bool // port 9050 bound locally (client SOCKS5)
|
||||||
|
ControlListening bool // port 9051 bound locally (control port)
|
||||||
|
Bootstrapped bool // relay has bootstrapped to 100%
|
||||||
|
BootstrapPct int // bootstrap percentage (0-100)
|
||||||
|
Fingerprint string // relay fingerprint
|
||||||
|
Nickname string // relay nickname
|
||||||
|
UptimeStr string // uptime from control port
|
||||||
|
ORPortReachable map[string]bool // host IP → whether we can TCP connect to their 9001 from this node
|
||||||
|
}
|
||||||
|
|
||||||
// Collect gathers data from all nodes in parallel.
|
// Collect gathers data from all nodes in parallel.
|
||||||
func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose bool) *ClusterData {
|
func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose bool) *ClusterData {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
@ -246,6 +262,10 @@ func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose boo
|
|||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
|
// Second pass: cross-node ORPort reachability (needs all nodes collected first)
|
||||||
|
collectAnyoneReachability(ctx, data)
|
||||||
|
|
||||||
data.Duration = time.Since(start)
|
data.Duration = time.Since(start)
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
@ -286,6 +306,9 @@ func collectNode(ctx context.Context, node Node, subsystems []string, verbose bo
|
|||||||
if shouldCollect("network") {
|
if shouldCollect("network") {
|
||||||
nd.Network = collectNetwork(ctx, node, nd.WireGuard)
|
nd.Network = collectNetwork(ctx, node, nd.WireGuard)
|
||||||
}
|
}
|
||||||
|
if shouldCollect("anyone") {
|
||||||
|
nd.Anyone = collectAnyone(ctx, node)
|
||||||
|
}
|
||||||
// Namespace collection — always collect if any subsystem is collected
|
// Namespace collection — always collect if any subsystem is collected
|
||||||
nd.Namespaces = collectNamespaces(ctx, node)
|
nd.Namespaces = collectNamespaces(ctx, node)
|
||||||
|
|
||||||
@ -1113,6 +1136,139 @@ echo "$SEP"
|
|||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func collectAnyone(ctx context.Context, node Node) *AnyoneData {
|
||||||
|
data := &AnyoneData{
|
||||||
|
ORPortReachable: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := `
|
||||||
|
SEP="===INSPECTOR_SEP==="
|
||||||
|
echo "$SEP"
|
||||||
|
systemctl is-active debros-anyone-relay 2>/dev/null || echo inactive
|
||||||
|
echo "$SEP"
|
||||||
|
systemctl is-active debros-anyone-client 2>/dev/null || echo inactive
|
||||||
|
echo "$SEP"
|
||||||
|
ss -tlnp 2>/dev/null | grep -q ':9001 ' && echo yes || echo no
|
||||||
|
echo "$SEP"
|
||||||
|
ss -tlnp 2>/dev/null | grep -q ':9050 ' && echo yes || echo no
|
||||||
|
echo "$SEP"
|
||||||
|
ss -tlnp 2>/dev/null | grep -q ':9051 ' && echo yes || echo no
|
||||||
|
echo "$SEP"
|
||||||
|
# Check bootstrap status from log (last 50 lines)
|
||||||
|
grep -oP 'Bootstrapped \K[0-9]+' /var/log/anon/notices.log 2>/dev/null | tail -1 || echo 0
|
||||||
|
echo "$SEP"
|
||||||
|
# Read fingerprint
|
||||||
|
cat /var/lib/anon/fingerprint 2>/dev/null || echo ""
|
||||||
|
echo "$SEP"
|
||||||
|
# Read nickname from config
|
||||||
|
grep -oP '^Nickname \K\S+' /etc/anon/anonrc 2>/dev/null || echo ""
|
||||||
|
`
|
||||||
|
|
||||||
|
res := RunSSH(ctx, node, cmd)
|
||||||
|
if !res.OK() && res.Stdout == "" {
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
parts := strings.Split(res.Stdout, "===INSPECTOR_SEP===")
|
||||||
|
|
||||||
|
if len(parts) > 1 {
|
||||||
|
data.RelayActive = strings.TrimSpace(parts[1]) == "active"
|
||||||
|
}
|
||||||
|
if len(parts) > 2 {
|
||||||
|
data.ClientActive = strings.TrimSpace(parts[2]) == "active"
|
||||||
|
}
|
||||||
|
if len(parts) > 3 {
|
||||||
|
data.ORPortListening = strings.TrimSpace(parts[3]) == "yes"
|
||||||
|
}
|
||||||
|
if len(parts) > 4 {
|
||||||
|
data.SocksListening = strings.TrimSpace(parts[4]) == "yes"
|
||||||
|
}
|
||||||
|
if len(parts) > 5 {
|
||||||
|
data.ControlListening = strings.TrimSpace(parts[5]) == "yes"
|
||||||
|
}
|
||||||
|
if len(parts) > 6 {
|
||||||
|
pct := parseIntDefault(strings.TrimSpace(parts[6]), 0)
|
||||||
|
data.BootstrapPct = pct
|
||||||
|
data.Bootstrapped = pct >= 100
|
||||||
|
}
|
||||||
|
if len(parts) > 7 {
|
||||||
|
data.Fingerprint = strings.TrimSpace(parts[7])
|
||||||
|
}
|
||||||
|
if len(parts) > 8 {
|
||||||
|
data.Nickname = strings.TrimSpace(parts[8])
|
||||||
|
}
|
||||||
|
|
||||||
|
// If neither relay nor client is active, skip further checks
|
||||||
|
if !data.RelayActive && !data.ClientActive {
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectAnyoneReachability runs a second pass to check ORPort reachability across nodes.
|
||||||
|
// Called after all nodes are collected so we know which nodes run relays.
|
||||||
|
func collectAnyoneReachability(ctx context.Context, data *ClusterData) {
|
||||||
|
// Find all nodes running the relay (have ORPort listening)
|
||||||
|
var relayHosts []string
|
||||||
|
for host, nd := range data.Nodes {
|
||||||
|
if nd.Anyone != nil && nd.Anyone.RelayActive && nd.Anyone.ORPortListening {
|
||||||
|
relayHosts = append(relayHosts, host)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(relayHosts) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// From each node, try to TCP connect to each relay's ORPort 9001
|
||||||
|
var mu sync.Mutex
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
for _, nd := range data.Nodes {
|
||||||
|
if nd.Anyone == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
wg.Add(1)
|
||||||
|
go func(nd *NodeData) {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
// Build commands to test TCP connectivity to each relay
|
||||||
|
var tcpCmds string
|
||||||
|
for _, relayHost := range relayHosts {
|
||||||
|
if relayHost == nd.Node.Host {
|
||||||
|
continue // skip self
|
||||||
|
}
|
||||||
|
tcpCmds += fmt.Sprintf(
|
||||||
|
`echo "ORPORT:%s:$(timeout 3 bash -c 'echo >/dev/tcp/%s/9001' 2>/dev/null && echo ok || echo fail)"
|
||||||
|
`, relayHost, relayHost)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tcpCmds == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
res := RunSSH(ctx, nd.Node, tcpCmds)
|
||||||
|
if res.Stdout == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
for _, line := range strings.Split(res.Stdout, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if strings.HasPrefix(line, "ORPORT:") {
|
||||||
|
p := strings.SplitN(line, ":", 3)
|
||||||
|
if len(p) == 3 {
|
||||||
|
nd.Anyone.ORPortReachable[p[1]] = p[2] == "ok"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(nd)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
func collectNamespaces(ctx context.Context, node Node) []NamespaceData {
|
func collectNamespaces(ctx context.Context, node Node) []NamespaceData {
|
||||||
// Detect namespace services: debros-namespace-gateway@<name>.service
|
// Detect namespace services: debros-namespace-gateway@<name>.service
|
||||||
cmd := `
|
cmd := `
|
||||||
|
|||||||
354
pkg/inspector/results_writer.go
Normal file
354
pkg/inspector/results_writer.go
Normal file
@ -0,0 +1,354 @@
|
|||||||
|
package inspector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FailureGroup groups identical check failures/warnings across nodes.
|
||||||
|
type FailureGroup struct {
|
||||||
|
ID string
|
||||||
|
Name string // from first check in group
|
||||||
|
Status Status
|
||||||
|
Severity Severity
|
||||||
|
Subsystem string
|
||||||
|
Nodes []string // affected node names (deduplicated)
|
||||||
|
Messages []string // unique messages (capped at 5)
|
||||||
|
Count int // total raw occurrence count (before dedup)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GroupFailures collapses CheckResults into unique failure groups keyed by (ID, Status).
|
||||||
|
// Only failures and warnings are grouped; passes and skips are ignored.
|
||||||
|
func GroupFailures(results *Results) []FailureGroup {
|
||||||
|
type groupKey struct {
|
||||||
|
ID string
|
||||||
|
Status Status
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := map[groupKey]*FailureGroup{}
|
||||||
|
nodesSeen := map[groupKey]map[string]bool{}
|
||||||
|
var order []groupKey
|
||||||
|
|
||||||
|
for _, c := range results.Checks {
|
||||||
|
if c.Status != StatusFail && c.Status != StatusWarn {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
k := groupKey{ID: c.ID, Status: c.Status}
|
||||||
|
g, exists := seen[k]
|
||||||
|
if !exists {
|
||||||
|
g = &FailureGroup{
|
||||||
|
ID: c.ID,
|
||||||
|
Name: c.Name,
|
||||||
|
Status: c.Status,
|
||||||
|
Severity: c.Severity,
|
||||||
|
Subsystem: c.Subsystem,
|
||||||
|
}
|
||||||
|
seen[k] = g
|
||||||
|
nodesSeen[k] = map[string]bool{}
|
||||||
|
order = append(order, k)
|
||||||
|
}
|
||||||
|
g.Count++
|
||||||
|
node := c.Node
|
||||||
|
if node == "" {
|
||||||
|
node = "cluster-wide"
|
||||||
|
}
|
||||||
|
// Deduplicate nodes (a node may appear for multiple targets)
|
||||||
|
if !nodesSeen[k][node] {
|
||||||
|
nodesSeen[k][node] = true
|
||||||
|
g.Nodes = append(g.Nodes, node)
|
||||||
|
}
|
||||||
|
// Track unique messages (cap at 5 to avoid bloat)
|
||||||
|
if len(g.Messages) < 5 {
|
||||||
|
found := false
|
||||||
|
for _, m := range g.Messages {
|
||||||
|
if m == c.Message {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
g.Messages = append(g.Messages, c.Message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort: failures before warnings, then by severity (high first), then by ID
|
||||||
|
groups := make([]FailureGroup, 0, len(order))
|
||||||
|
for _, k := range order {
|
||||||
|
groups = append(groups, *seen[k])
|
||||||
|
}
|
||||||
|
sort.Slice(groups, func(i, j int) bool {
|
||||||
|
oi, oj := statusOrder(groups[i].Status), statusOrder(groups[j].Status)
|
||||||
|
if oi != oj {
|
||||||
|
return oi < oj
|
||||||
|
}
|
||||||
|
if groups[i].Severity != groups[j].Severity {
|
||||||
|
return groups[i].Severity > groups[j].Severity
|
||||||
|
}
|
||||||
|
return groups[i].ID < groups[j].ID
|
||||||
|
})
|
||||||
|
|
||||||
|
return groups
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteResults saves inspection results as markdown files to a timestamped directory.
|
||||||
|
// Returns the output directory path.
|
||||||
|
func WriteResults(baseDir, env string, results *Results, data *ClusterData, analysis *AnalysisResult) (string, error) {
|
||||||
|
ts := time.Now().Format("2006-01-02_150405")
|
||||||
|
dir := filepath.Join(baseDir, env, ts)
|
||||||
|
|
||||||
|
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||||
|
return "", fmt.Errorf("create output directory: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
groups := GroupFailures(results)
|
||||||
|
|
||||||
|
// Build analysis lookup: groupID -> analysis text
|
||||||
|
analysisMap := map[string]string{}
|
||||||
|
if analysis != nil {
|
||||||
|
for _, sa := range analysis.Analyses {
|
||||||
|
key := sa.GroupID
|
||||||
|
if key == "" {
|
||||||
|
key = sa.Subsystem
|
||||||
|
}
|
||||||
|
if sa.Error == nil {
|
||||||
|
analysisMap[key] = sa.Analysis
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write summary.md
|
||||||
|
if err := writeSummary(dir, env, ts, results, data, groups, analysisMap); err != nil {
|
||||||
|
return "", fmt.Errorf("write summary: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group checks by subsystem for per-subsystem files
|
||||||
|
checksBySubsystem := map[string][]CheckResult{}
|
||||||
|
for _, c := range results.Checks {
|
||||||
|
checksBySubsystem[c.Subsystem] = append(checksBySubsystem[c.Subsystem], c)
|
||||||
|
}
|
||||||
|
|
||||||
|
groupsBySubsystem := map[string][]FailureGroup{}
|
||||||
|
for _, g := range groups {
|
||||||
|
groupsBySubsystem[g.Subsystem] = append(groupsBySubsystem[g.Subsystem], g)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write per-subsystem files
|
||||||
|
for sub, checks := range checksBySubsystem {
|
||||||
|
subGroups := groupsBySubsystem[sub]
|
||||||
|
if err := writeSubsystem(dir, sub, ts, checks, subGroups, analysisMap); err != nil {
|
||||||
|
return "", fmt.Errorf("write %s: %w", sub, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dir, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSummary(dir, env, ts string, results *Results, data *ClusterData, groups []FailureGroup, analysisMap map[string]string) error {
|
||||||
|
var b strings.Builder
|
||||||
|
passed, failed, warned, skipped := results.Summary()
|
||||||
|
|
||||||
|
b.WriteString(fmt.Sprintf("# %s Inspection Report\n\n", strings.ToUpper(env)))
|
||||||
|
b.WriteString(fmt.Sprintf("**Date:** %s \n", ts))
|
||||||
|
b.WriteString(fmt.Sprintf("**Nodes:** %d \n", len(data.Nodes)))
|
||||||
|
b.WriteString(fmt.Sprintf("**Total:** %d passed, %d failed, %d warnings, %d skipped \n\n", passed, failed, warned, skipped))
|
||||||
|
|
||||||
|
// Per-subsystem table
|
||||||
|
subStats := map[string][4]int{} // [pass, fail, warn, skip]
|
||||||
|
var subsystems []string
|
||||||
|
for _, c := range results.Checks {
|
||||||
|
if _, exists := subStats[c.Subsystem]; !exists {
|
||||||
|
subsystems = append(subsystems, c.Subsystem)
|
||||||
|
}
|
||||||
|
s := subStats[c.Subsystem]
|
||||||
|
switch c.Status {
|
||||||
|
case StatusPass:
|
||||||
|
s[0]++
|
||||||
|
case StatusFail:
|
||||||
|
s[1]++
|
||||||
|
case StatusWarn:
|
||||||
|
s[2]++
|
||||||
|
case StatusSkip:
|
||||||
|
s[3]++
|
||||||
|
}
|
||||||
|
subStats[c.Subsystem] = s
|
||||||
|
}
|
||||||
|
sort.Strings(subsystems)
|
||||||
|
|
||||||
|
// Count issue groups per subsystem
|
||||||
|
issueCountBySub := map[string]int{}
|
||||||
|
for _, g := range groups {
|
||||||
|
issueCountBySub[g.Subsystem]++
|
||||||
|
}
|
||||||
|
|
||||||
|
b.WriteString("## Subsystems\n\n")
|
||||||
|
b.WriteString("| Subsystem | Pass | Fail | Warn | Skip | Issues |\n")
|
||||||
|
b.WriteString("|-----------|------|------|------|------|--------|\n")
|
||||||
|
for _, sub := range subsystems {
|
||||||
|
s := subStats[sub]
|
||||||
|
issues := issueCountBySub[sub]
|
||||||
|
link := fmt.Sprintf("[%s](%s.md)", sub, sub)
|
||||||
|
b.WriteString(fmt.Sprintf("| %s | %d | %d | %d | %d | %d |\n", link, s[0], s[1], s[2], s[3], issues))
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
|
||||||
|
// Critical issues section
|
||||||
|
critical := filterGroupsBySeverity(groups, High)
|
||||||
|
if len(critical) > 0 {
|
||||||
|
b.WriteString("## Critical Issues\n\n")
|
||||||
|
for i, g := range critical {
|
||||||
|
icon := "FAIL"
|
||||||
|
if g.Status == StatusWarn {
|
||||||
|
icon = "WARN"
|
||||||
|
}
|
||||||
|
nodeInfo := fmt.Sprintf("%d nodes", len(g.Nodes))
|
||||||
|
if g.Count > len(g.Nodes) {
|
||||||
|
nodeInfo = fmt.Sprintf("%d nodes (%d occurrences)", len(g.Nodes), g.Count)
|
||||||
|
}
|
||||||
|
b.WriteString(fmt.Sprintf("%d. **[%s]** %s — %s \n", i+1, icon, g.Name, nodeInfo))
|
||||||
|
b.WriteString(fmt.Sprintf(" *%s* → [details](%s.md#%s) \n",
|
||||||
|
g.Messages[0], g.Subsystem, anchorID(g.Name)))
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collection errors
|
||||||
|
var errs []string
|
||||||
|
for _, nd := range data.Nodes {
|
||||||
|
for _, e := range nd.Errors {
|
||||||
|
errs = append(errs, fmt.Sprintf("- **%s**: %s", nd.Node.Name(), e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(errs) > 0 {
|
||||||
|
b.WriteString("## Collection Errors\n\n")
|
||||||
|
for _, e := range errs {
|
||||||
|
b.WriteString(e + "\n")
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.WriteFile(filepath.Join(dir, "summary.md"), []byte(b.String()), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSubsystem(dir, subsystem, ts string, checks []CheckResult, groups []FailureGroup, analysisMap map[string]string) error {
|
||||||
|
var b strings.Builder
|
||||||
|
|
||||||
|
// Count
|
||||||
|
var passed, failed, warned, skipped int
|
||||||
|
for _, c := range checks {
|
||||||
|
switch c.Status {
|
||||||
|
case StatusPass:
|
||||||
|
passed++
|
||||||
|
case StatusFail:
|
||||||
|
failed++
|
||||||
|
case StatusWarn:
|
||||||
|
warned++
|
||||||
|
case StatusSkip:
|
||||||
|
skipped++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
b.WriteString(fmt.Sprintf("# %s\n\n", strings.ToUpper(subsystem)))
|
||||||
|
b.WriteString(fmt.Sprintf("**Date:** %s \n", ts))
|
||||||
|
b.WriteString(fmt.Sprintf("**Checks:** %d passed, %d failed, %d warnings, %d skipped \n\n", passed, failed, warned, skipped))
|
||||||
|
|
||||||
|
// Issues section
|
||||||
|
if len(groups) > 0 {
|
||||||
|
b.WriteString("## Issues\n\n")
|
||||||
|
for i, g := range groups {
|
||||||
|
icon := "FAIL"
|
||||||
|
if g.Status == StatusWarn {
|
||||||
|
icon = "WARN"
|
||||||
|
}
|
||||||
|
b.WriteString(fmt.Sprintf("### %d. %s\n\n", i+1, g.Name))
|
||||||
|
nodeInfo := fmt.Sprintf("%d nodes", len(g.Nodes))
|
||||||
|
if g.Count > len(g.Nodes) {
|
||||||
|
nodeInfo = fmt.Sprintf("%d nodes (%d occurrences)", len(g.Nodes), g.Count)
|
||||||
|
}
|
||||||
|
b.WriteString(fmt.Sprintf("**Status:** %s | **Severity:** %s | **Affected:** %s \n\n", icon, g.Severity, nodeInfo))
|
||||||
|
|
||||||
|
// Affected nodes
|
||||||
|
b.WriteString("**Affected nodes:**\n")
|
||||||
|
for _, n := range g.Nodes {
|
||||||
|
b.WriteString(fmt.Sprintf("- `%s`\n", n))
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
|
||||||
|
// Messages
|
||||||
|
if len(g.Messages) == 1 {
|
||||||
|
b.WriteString(fmt.Sprintf("**Detail:** %s\n\n", g.Messages[0]))
|
||||||
|
} else {
|
||||||
|
b.WriteString("**Details:**\n")
|
||||||
|
for _, m := range g.Messages {
|
||||||
|
b.WriteString(fmt.Sprintf("- %s\n", m))
|
||||||
|
}
|
||||||
|
b.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// AI analysis (if available)
|
||||||
|
if ai, ok := analysisMap[g.ID]; ok {
|
||||||
|
b.WriteString(ai)
|
||||||
|
b.WriteString("\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
b.WriteString("---\n\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All checks table
|
||||||
|
b.WriteString("## All Checks\n\n")
|
||||||
|
b.WriteString("| Status | Severity | Check | Node | Detail |\n")
|
||||||
|
b.WriteString("|--------|----------|-------|------|--------|\n")
|
||||||
|
|
||||||
|
// Sort: failures first
|
||||||
|
sorted := make([]CheckResult, len(checks))
|
||||||
|
copy(sorted, checks)
|
||||||
|
sort.Slice(sorted, func(i, j int) bool {
|
||||||
|
oi, oj := statusOrder(sorted[i].Status), statusOrder(sorted[j].Status)
|
||||||
|
if oi != oj {
|
||||||
|
return oi < oj
|
||||||
|
}
|
||||||
|
if sorted[i].Severity != sorted[j].Severity {
|
||||||
|
return sorted[i].Severity > sorted[j].Severity
|
||||||
|
}
|
||||||
|
return sorted[i].ID < sorted[j].ID
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, c := range sorted {
|
||||||
|
node := c.Node
|
||||||
|
if node == "" {
|
||||||
|
node = "cluster-wide"
|
||||||
|
}
|
||||||
|
msg := strings.ReplaceAll(c.Message, "|", "\\|")
|
||||||
|
b.WriteString(fmt.Sprintf("| %s | %s | %s | %s | %s |\n",
|
||||||
|
statusIcon(c.Status), c.Severity, c.Name, node, msg))
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.WriteFile(filepath.Join(dir, subsystem+".md"), []byte(b.String()), 0o644)
|
||||||
|
}
|
||||||
|
|
||||||
|
func filterGroupsBySeverity(groups []FailureGroup, minSeverity Severity) []FailureGroup {
|
||||||
|
var out []FailureGroup
|
||||||
|
for _, g := range groups {
|
||||||
|
if g.Severity >= minSeverity {
|
||||||
|
out = append(out, g)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func anchorID(name string) string {
|
||||||
|
s := strings.ToLower(name)
|
||||||
|
s = strings.ReplaceAll(s, " ", "-")
|
||||||
|
s = strings.Map(func(r rune) rune {
|
||||||
|
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}, s)
|
||||||
|
return s
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user