mirror of
https://github.com/DeBrosOfficial/orama.git
synced 2026-03-17 09:36:56 +00:00
Improved how we present results on inspector and added anyone tests
This commit is contained in:
parent
ebdd08f71c
commit
4356f5544a
2
.gitignore
vendored
2
.gitignore
vendored
@ -106,3 +106,5 @@ terms-agreement
|
||||
|
||||
cli
|
||||
./inspector
|
||||
|
||||
results/
|
||||
@ -51,10 +51,12 @@ func HandleInspectCommand(args []string) {
|
||||
|
||||
configPath := fs.String("config", "scripts/remote-nodes.conf", "Path to remote-nodes.conf")
|
||||
env := fs.String("env", "", "Environment to inspect (devnet, testnet)")
|
||||
subsystem := fs.String("subsystem", "all", "Subsystem to inspect (rqlite,olric,ipfs,dns,wg,system,network,all)")
|
||||
subsystem := fs.String("subsystem", "all", "Subsystem to inspect (rqlite,olric,ipfs,dns,wg,system,network,anyone,all)")
|
||||
format := fs.String("format", "table", "Output format (table, json)")
|
||||
timeout := fs.Duration("timeout", 30*time.Second, "SSH command timeout")
|
||||
verbose := fs.Bool("verbose", false, "Verbose output")
|
||||
// Output flags
|
||||
outputDir := fs.String("output", "", "Save results to directory as markdown (e.g., ./results)")
|
||||
// AI flags
|
||||
aiEnabled := fs.Bool("ai", false, "Enable AI analysis of failures")
|
||||
aiModel := fs.String("model", "moonshotai/kimi-k2.5", "OpenRouter model for AI analysis")
|
||||
@ -70,6 +72,7 @@ func HandleInspectCommand(args []string) {
|
||||
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --subsystem rqlite\n")
|
||||
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai\n")
|
||||
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --model openai/gpt-4o\n")
|
||||
fmt.Fprintf(os.Stderr, " orama inspect --env devnet --ai --output ./results\n")
|
||||
}
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
@ -136,18 +139,31 @@ func HandleInspectCommand(args []string) {
|
||||
}
|
||||
|
||||
// Phase 4: AI Analysis (if enabled and there are failures or warnings)
|
||||
var analysis *inspector.AnalysisResult
|
||||
if *aiEnabled {
|
||||
issues := results.FailuresAndWarnings()
|
||||
if len(issues) == 0 {
|
||||
fmt.Printf("\nAll checks passed — no AI analysis needed.\n")
|
||||
} else if *outputDir != "" {
|
||||
// Per-group AI analysis for file output
|
||||
groups := inspector.GroupFailures(results)
|
||||
fmt.Printf("\nAnalyzing %d unique issues with %s...\n", len(groups), *aiModel)
|
||||
var err error
|
||||
analysis, err = inspector.AnalyzeGroups(groups, results, data, *aiModel, *aiAPIKey)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err)
|
||||
} else {
|
||||
inspector.PrintAnalysis(analysis, os.Stdout)
|
||||
}
|
||||
} else {
|
||||
// Count affected subsystems
|
||||
// Per-subsystem AI analysis for terminal output
|
||||
subs := map[string]bool{}
|
||||
for _, c := range issues {
|
||||
subs[c.Subsystem] = true
|
||||
}
|
||||
fmt.Printf("\nAnalyzing %d issues across %d subsystems with %s...\n", len(issues), len(subs), *aiModel)
|
||||
analysis, err := inspector.Analyze(results, data, *aiModel, *aiAPIKey)
|
||||
var err error
|
||||
analysis, err = inspector.Analyze(results, data, *aiModel, *aiAPIKey)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "\nAI analysis failed: %v\n", err)
|
||||
} else {
|
||||
@ -156,6 +172,16 @@ func HandleInspectCommand(args []string) {
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 5: Write results to disk (if --output is set)
|
||||
if *outputDir != "" {
|
||||
outPath, err := inspector.WriteResults(*outputDir, *env, results, data, analysis)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "\nError writing results: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf("\nResults saved to %s\n", outPath)
|
||||
}
|
||||
}
|
||||
|
||||
// Exit with non-zero if any failures
|
||||
if failures := results.Failures(); len(failures) > 0 {
|
||||
os.Exit(1)
|
||||
|
||||
@ -52,9 +52,10 @@ Step-by-step commands to resolve. Include actual node IPs/names from the data wh
|
||||
### Prevention
|
||||
What could prevent this in the future? (omit if not applicable)`
|
||||
|
||||
// SubsystemAnalysis holds the AI analysis for a single subsystem.
|
||||
// SubsystemAnalysis holds the AI analysis for a single subsystem or failure group.
|
||||
type SubsystemAnalysis struct {
|
||||
Subsystem string
|
||||
GroupID string // e.g. "anyone.bootstrapped" — empty when analyzing whole subsystem
|
||||
Analysis string
|
||||
Duration time.Duration
|
||||
Error error
|
||||
@ -149,6 +150,125 @@ func Analyze(results *Results, data *ClusterData, model, apiKey string) (*Analys
|
||||
}, nil
|
||||
}
|
||||
|
||||
// AnalyzeGroups sends each failure group to OpenRouter for focused AI analysis.
|
||||
// Unlike Analyze which sends one call per subsystem, this sends one call per unique
|
||||
// failure pattern, producing more focused and actionable results.
|
||||
func AnalyzeGroups(groups []FailureGroup, results *Results, data *ClusterData, model, apiKey string) (*AnalysisResult, error) {
|
||||
if apiKey == "" {
|
||||
apiKey = os.Getenv("OPENROUTER_API_KEY")
|
||||
}
|
||||
if apiKey == "" {
|
||||
return nil, fmt.Errorf("no API key: set --api-key or OPENROUTER_API_KEY env")
|
||||
}
|
||||
|
||||
if len(groups) == 0 {
|
||||
return &AnalysisResult{Model: model}, nil
|
||||
}
|
||||
|
||||
// Build shared context
|
||||
issuesBySubsystem := map[string][]CheckResult{}
|
||||
for _, c := range results.FailuresAndWarnings() {
|
||||
issuesBySubsystem[c.Subsystem] = append(issuesBySubsystem[c.Subsystem], c)
|
||||
}
|
||||
healthySummary := buildHealthySummary(results, issuesBySubsystem)
|
||||
collectionErrors := buildCollectionErrors(data)
|
||||
|
||||
start := time.Now()
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
var analyses []SubsystemAnalysis
|
||||
|
||||
for _, g := range groups {
|
||||
wg.Add(1)
|
||||
go func(group FailureGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
prompt := buildGroupPrompt(group, data, healthySummary, collectionErrors)
|
||||
subStart := time.Now()
|
||||
response, err := callOpenRouter(model, apiKey, prompt)
|
||||
|
||||
sa := SubsystemAnalysis{
|
||||
Subsystem: group.Subsystem,
|
||||
GroupID: group.ID,
|
||||
Duration: time.Since(subStart),
|
||||
}
|
||||
if err != nil {
|
||||
sa.Error = err
|
||||
} else {
|
||||
sa.Analysis = response
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
analyses = append(analyses, sa)
|
||||
mu.Unlock()
|
||||
}(g)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Sort by subsystem then group ID for consistent output
|
||||
sort.Slice(analyses, func(i, j int) bool {
|
||||
if analyses[i].Subsystem != analyses[j].Subsystem {
|
||||
return analyses[i].Subsystem < analyses[j].Subsystem
|
||||
}
|
||||
return analyses[i].GroupID < analyses[j].GroupID
|
||||
})
|
||||
|
||||
return &AnalysisResult{
|
||||
Model: model,
|
||||
Analyses: analyses,
|
||||
Duration: time.Since(start),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func buildGroupPrompt(group FailureGroup, data *ClusterData, healthySummary, collectionErrors string) string {
|
||||
var b strings.Builder
|
||||
|
||||
icon := "FAILURE"
|
||||
if group.Status == StatusWarn {
|
||||
icon = "WARNING"
|
||||
}
|
||||
|
||||
b.WriteString(fmt.Sprintf("## %s: %s\n\n", icon, group.Name))
|
||||
b.WriteString(fmt.Sprintf("**Check ID:** %s \n", group.ID))
|
||||
b.WriteString(fmt.Sprintf("**Severity:** %s \n", group.Severity))
|
||||
b.WriteString(fmt.Sprintf("**Nodes affected:** %d \n\n", len(group.Nodes)))
|
||||
|
||||
b.WriteString("**Affected nodes:**\n")
|
||||
for _, n := range group.Nodes {
|
||||
b.WriteString(fmt.Sprintf("- %s\n", n))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
b.WriteString("**Error messages:**\n")
|
||||
for _, m := range group.Messages {
|
||||
b.WriteString(fmt.Sprintf("- %s\n", m))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// Subsystem raw data
|
||||
contextData := buildSubsystemContext(group.Subsystem, data)
|
||||
if contextData != "" {
|
||||
b.WriteString(fmt.Sprintf("## %s Raw Data (all nodes)\n", strings.ToUpper(group.Subsystem)))
|
||||
b.WriteString(contextData)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
if healthySummary != "" {
|
||||
b.WriteString("## Healthy Subsystems\n")
|
||||
b.WriteString(healthySummary)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
if collectionErrors != "" {
|
||||
b.WriteString("## Collection Errors\n")
|
||||
b.WriteString(collectionErrors)
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
b.WriteString(fmt.Sprintf("\nAnalyze this specific %s issue. Be concise — focus on this one problem.\n", group.Subsystem))
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func buildClusterOverview(data *ClusterData, results *Results) string {
|
||||
var b strings.Builder
|
||||
b.WriteString(fmt.Sprintf("Nodes: %d\n", len(data.Nodes)))
|
||||
@ -286,6 +406,8 @@ func buildSubsystemContext(subsystem string, data *ClusterData) string {
|
||||
return buildNetworkContext(data)
|
||||
case "namespace":
|
||||
return buildNamespaceContext(data)
|
||||
case "anyone":
|
||||
return buildAnyoneContext(data)
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
@ -486,6 +608,40 @@ func buildNamespaceContext(data *ClusterData) string {
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func buildAnyoneContext(data *ClusterData) string {
|
||||
var b strings.Builder
|
||||
for host, nd := range data.Nodes {
|
||||
if nd.Anyone == nil {
|
||||
continue
|
||||
}
|
||||
a := nd.Anyone
|
||||
if !a.RelayActive && !a.ClientActive {
|
||||
continue
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("### %s\n", host))
|
||||
b.WriteString(fmt.Sprintf(" relay=%v client=%v orport=%v socks=%v control=%v\n",
|
||||
a.RelayActive, a.ClientActive, a.ORPortListening, a.SocksListening, a.ControlListening))
|
||||
if a.RelayActive {
|
||||
b.WriteString(fmt.Sprintf(" bootstrap=%d%% fingerprint=%s nickname=%s\n",
|
||||
a.BootstrapPct, a.Fingerprint, a.Nickname))
|
||||
}
|
||||
if len(a.ORPortReachable) > 0 {
|
||||
var unreachable []string
|
||||
for h, ok := range a.ORPortReachable {
|
||||
if !ok {
|
||||
unreachable = append(unreachable, h)
|
||||
}
|
||||
}
|
||||
if len(unreachable) > 0 {
|
||||
b.WriteString(fmt.Sprintf(" orport_unreachable: %s\n", strings.Join(unreachable, ", ")))
|
||||
} else {
|
||||
b.WriteString(fmt.Sprintf(" orport: all %d peers reachable\n", len(a.ORPortReachable)))
|
||||
}
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// OpenRouter API types (OpenAI-compatible)
|
||||
|
||||
type openRouterRequest struct {
|
||||
@ -531,7 +687,7 @@ func callOpenRouter(model, apiKey, prompt string) (string, error) {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Authorization", "Bearer "+apiKey)
|
||||
|
||||
client := &http.Client{Timeout: 120 * time.Second}
|
||||
client := &http.Client{Timeout: 180 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("HTTP request: %w", err)
|
||||
|
||||
170
pkg/inspector/checks/anyone.go
Normal file
170
pkg/inspector/checks/anyone.go
Normal file
@ -0,0 +1,170 @@
|
||||
package checks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/inspector"
|
||||
)
|
||||
|
||||
func init() {
|
||||
inspector.RegisterChecker("anyone", CheckAnyone)
|
||||
}
|
||||
|
||||
const anyoneSub = "anyone"
|
||||
|
||||
// CheckAnyone runs all Anyone relay/client health checks.
|
||||
func CheckAnyone(data *inspector.ClusterData) []inspector.CheckResult {
|
||||
var results []inspector.CheckResult
|
||||
|
||||
for _, nd := range data.Nodes {
|
||||
if nd.Anyone == nil {
|
||||
continue
|
||||
}
|
||||
results = append(results, checkAnyonePerNode(nd)...)
|
||||
}
|
||||
|
||||
results = append(results, checkAnyoneCrossNode(data)...)
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func checkAnyonePerNode(nd *inspector.NodeData) []inspector.CheckResult {
|
||||
var r []inspector.CheckResult
|
||||
a := nd.Anyone
|
||||
node := nd.Node.Name()
|
||||
|
||||
// If neither service is active, skip all checks for this node
|
||||
if !a.RelayActive && !a.ClientActive {
|
||||
return r
|
||||
}
|
||||
|
||||
// --- Relay checks ---
|
||||
if a.RelayActive {
|
||||
r = append(r, inspector.Pass("anyone.relay_active", "Anyone relay service active", anyoneSub, node,
|
||||
"debros-anyone-relay is active", inspector.High))
|
||||
|
||||
// ORPort listening
|
||||
if a.ORPortListening {
|
||||
r = append(r, inspector.Pass("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node,
|
||||
"port 9001 bound", inspector.High))
|
||||
} else {
|
||||
r = append(r, inspector.Fail("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node,
|
||||
"port 9001 NOT bound", inspector.High))
|
||||
}
|
||||
|
||||
// Control port
|
||||
if a.ControlListening {
|
||||
r = append(r, inspector.Pass("anyone.control_listening", "Control port 9051 listening", anyoneSub, node,
|
||||
"port 9051 bound", inspector.Low))
|
||||
} else {
|
||||
r = append(r, inspector.Warn("anyone.control_listening", "Control port 9051 listening", anyoneSub, node,
|
||||
"port 9051 NOT bound (monitoring unavailable)", inspector.Low))
|
||||
}
|
||||
|
||||
// Bootstrap status
|
||||
if a.Bootstrapped {
|
||||
r = append(r, inspector.Pass("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
|
||||
fmt.Sprintf("bootstrap=%d%%", a.BootstrapPct), inspector.High))
|
||||
} else if a.BootstrapPct > 0 {
|
||||
r = append(r, inspector.Warn("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
|
||||
fmt.Sprintf("bootstrap=%d%% (still connecting)", a.BootstrapPct), inspector.High))
|
||||
} else {
|
||||
r = append(r, inspector.Fail("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node,
|
||||
"bootstrap=0% (not started or log missing)", inspector.High))
|
||||
}
|
||||
|
||||
// Fingerprint present
|
||||
if a.Fingerprint != "" {
|
||||
r = append(r, inspector.Pass("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node,
|
||||
fmt.Sprintf("fingerprint=%s", a.Fingerprint), inspector.Medium))
|
||||
} else {
|
||||
r = append(r, inspector.Warn("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node,
|
||||
"no fingerprint found (relay may not have generated keys yet)", inspector.Medium))
|
||||
}
|
||||
|
||||
// Nickname configured
|
||||
if a.Nickname != "" {
|
||||
r = append(r, inspector.Pass("anyone.nickname", "Relay nickname configured", anyoneSub, node,
|
||||
fmt.Sprintf("nickname=%s", a.Nickname), inspector.Low))
|
||||
} else {
|
||||
r = append(r, inspector.Warn("anyone.nickname", "Relay nickname configured", anyoneSub, node,
|
||||
"no nickname in /etc/anon/anonrc", inspector.Low))
|
||||
}
|
||||
}
|
||||
|
||||
// --- Client checks ---
|
||||
if a.ClientActive {
|
||||
r = append(r, inspector.Pass("anyone.client_active", "Anyone client service active", anyoneSub, node,
|
||||
"debros-anyone-client is active", inspector.High))
|
||||
|
||||
// SOCKS5 port listening
|
||||
if a.SocksListening {
|
||||
r = append(r, inspector.Pass("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node,
|
||||
"port 9050 bound", inspector.High))
|
||||
} else {
|
||||
r = append(r, inspector.Fail("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node,
|
||||
"port 9050 NOT bound (IPFS traffic cannot route through anonymity network)", inspector.High))
|
||||
}
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
func checkAnyoneCrossNode(data *inspector.ClusterData) []inspector.CheckResult {
|
||||
var r []inspector.CheckResult
|
||||
|
||||
// Count relay and client nodes
|
||||
relayActive := 0
|
||||
relayTotal := 0
|
||||
clientActive := 0
|
||||
clientTotal := 0
|
||||
|
||||
for _, nd := range data.Nodes {
|
||||
if nd.Anyone == nil {
|
||||
continue
|
||||
}
|
||||
if nd.Anyone.RelayActive {
|
||||
relayActive++
|
||||
relayTotal++
|
||||
}
|
||||
if nd.Anyone.ClientActive {
|
||||
clientActive++
|
||||
clientTotal++
|
||||
}
|
||||
}
|
||||
|
||||
// Skip cross-node checks if no Anyone services at all
|
||||
if relayTotal == 0 && clientTotal == 0 {
|
||||
return r
|
||||
}
|
||||
|
||||
// ORPort reachability: check if relays are publicly accessible from other nodes
|
||||
orportChecked := 0
|
||||
orportReachable := 0
|
||||
orportFailed := 0
|
||||
|
||||
for _, nd := range data.Nodes {
|
||||
if nd.Anyone == nil {
|
||||
continue
|
||||
}
|
||||
for host, ok := range nd.Anyone.ORPortReachable {
|
||||
orportChecked++
|
||||
if ok {
|
||||
orportReachable++
|
||||
} else {
|
||||
orportFailed++
|
||||
r = append(r, inspector.Fail("anyone.orport_reachable",
|
||||
fmt.Sprintf("ORPort 9001 reachable on %s", host),
|
||||
anyoneSub, nd.Node.Name(),
|
||||
fmt.Sprintf("cannot TCP connect to %s:9001 from %s", host, nd.Node.Name()), inspector.High))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if orportChecked > 0 && orportFailed == 0 {
|
||||
r = append(r, inspector.Pass("anyone.orport_reachable", "ORPort 9001 reachable across nodes", anyoneSub, "",
|
||||
fmt.Sprintf("all %d cross-node connections OK", orportReachable), inspector.High))
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
219
pkg/inspector/checks/anyone_test.go
Normal file
219
pkg/inspector/checks/anyone_test.go
Normal file
@ -0,0 +1,219 @@
|
||||
package checks
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/DeBrosOfficial/network/pkg/inspector"
|
||||
)
|
||||
|
||||
func TestCheckAnyone_NilData(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
if len(results) != 0 {
|
||||
t.Errorf("expected 0 results for nil Anyone data, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckAnyone_BothInactive(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
if len(results) != 0 {
|
||||
t.Errorf("expected 0 results when both services inactive, got %d", len(results))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckAnyone_HealthyRelay(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
ControlListening: true,
|
||||
Bootstrapped: true,
|
||||
BootstrapPct: 100,
|
||||
Fingerprint: "ABCDEF1234567890",
|
||||
Nickname: "OramaRelay1",
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.relay_active", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.orport_listening", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.control_listening", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.fingerprint", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.nickname", inspector.StatusPass)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_HealthyClient(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
ClientActive: true,
|
||||
SocksListening: true,
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.client_active", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_RelayORPortDown(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: false,
|
||||
ControlListening: true,
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.orport_listening", inspector.StatusFail)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_RelayNotBootstrapped(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
BootstrapPct: 0,
|
||||
Bootstrapped: false,
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusFail)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_RelayPartialBootstrap(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
BootstrapPct: 75,
|
||||
Bootstrapped: false,
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.bootstrapped", inspector.StatusWarn)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_ClientSocksDown(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
ClientActive: true,
|
||||
SocksListening: false,
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.socks_listening", inspector.StatusFail)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_NoFingerprint(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
Fingerprint: "",
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.fingerprint", inspector.StatusWarn)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_CrossNode_ORPortReachable(t *testing.T) {
|
||||
nd1 := makeNodeData("1.1.1.1", "node")
|
||||
nd1.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
ORPortReachable: map[string]bool{"2.2.2.2": true},
|
||||
}
|
||||
|
||||
nd2 := makeNodeData("2.2.2.2", "node")
|
||||
nd2.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
ORPortReachable: map[string]bool{"1.1.1.1": true},
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd1, "2.2.2.2": nd2})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
expectStatus(t, results, "anyone.orport_reachable", inspector.StatusPass)
|
||||
}
|
||||
|
||||
func TestCheckAnyone_CrossNode_ORPortUnreachable(t *testing.T) {
|
||||
nd1 := makeNodeData("1.1.1.1", "node")
|
||||
nd1.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
ORPortReachable: map[string]bool{"2.2.2.2": false},
|
||||
}
|
||||
|
||||
nd2 := makeNodeData("2.2.2.2", "node")
|
||||
nd2.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ORPortListening: true,
|
||||
ORPortReachable: map[string]bool{"1.1.1.1": true},
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd1, "2.2.2.2": nd2})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
// Should have at least one fail for the unreachable connection
|
||||
hasFail := false
|
||||
for _, r := range results {
|
||||
if r.ID == "anyone.orport_reachable" && r.Status == inspector.StatusFail {
|
||||
hasFail = true
|
||||
}
|
||||
}
|
||||
if !hasFail {
|
||||
t.Error("expected at least one anyone.orport_reachable fail")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckAnyone_BothRelayAndClient(t *testing.T) {
|
||||
nd := makeNodeData("1.1.1.1", "node")
|
||||
nd.Anyone = &inspector.AnyoneData{
|
||||
RelayActive: true,
|
||||
ClientActive: true,
|
||||
ORPortListening: true,
|
||||
SocksListening: true,
|
||||
ControlListening: true,
|
||||
Bootstrapped: true,
|
||||
BootstrapPct: 100,
|
||||
Fingerprint: "ABCDEF",
|
||||
Nickname: "test",
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd})
|
||||
results := CheckAnyone(data)
|
||||
|
||||
// Should have both relay and client checks
|
||||
expectStatus(t, results, "anyone.relay_active", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.client_active", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass)
|
||||
expectStatus(t, results, "anyone.orport_listening", inspector.StatusPass)
|
||||
}
|
||||
@ -50,6 +50,22 @@ func checkSystemPerNode(nd *inspector.NodeData) []inspector.CheckResult {
|
||||
}
|
||||
}
|
||||
|
||||
// 6.2 Anyone relay/client services (only check if installed, don't fail if absent)
|
||||
for _, svc := range []string{"debros-anyone-relay", "debros-anyone-client"} {
|
||||
status, ok := sys.Services[svc]
|
||||
if !ok || status == "inactive" {
|
||||
continue // not installed or intentionally stopped
|
||||
}
|
||||
id := fmt.Sprintf("system.svc_%s", strings.ReplaceAll(svc, "-", "_"))
|
||||
name := fmt.Sprintf("%s service active", svc)
|
||||
if status == "active" {
|
||||
r = append(r, inspector.Pass(id, name, systemSub, node, "active", inspector.High))
|
||||
} else {
|
||||
r = append(r, inspector.Fail(id, name, systemSub, node,
|
||||
fmt.Sprintf("status=%s (should be active or uninstalled)", status), inspector.High))
|
||||
}
|
||||
}
|
||||
|
||||
// 6.5 WireGuard service
|
||||
if status, ok := sys.Services["wg-quick@wg0"]; ok {
|
||||
if status == "active" {
|
||||
|
||||
@ -26,6 +26,7 @@ type NodeData struct {
|
||||
WireGuard *WireGuardData
|
||||
System *SystemData
|
||||
Network *NetworkData
|
||||
Anyone *AnyoneData
|
||||
Namespaces []NamespaceData // namespace instances on this node
|
||||
Errors []string // collection errors for this node
|
||||
}
|
||||
@ -224,6 +225,21 @@ type NetworkData struct {
|
||||
PingResults map[string]bool // WG peer IP → ping success
|
||||
}
|
||||
|
||||
// AnyoneData holds parsed Anyone relay/client status from a node.
|
||||
type AnyoneData struct {
|
||||
RelayActive bool // debros-anyone-relay systemd service active
|
||||
ClientActive bool // debros-anyone-client systemd service active
|
||||
ORPortListening bool // port 9001 bound locally
|
||||
SocksListening bool // port 9050 bound locally (client SOCKS5)
|
||||
ControlListening bool // port 9051 bound locally (control port)
|
||||
Bootstrapped bool // relay has bootstrapped to 100%
|
||||
BootstrapPct int // bootstrap percentage (0-100)
|
||||
Fingerprint string // relay fingerprint
|
||||
Nickname string // relay nickname
|
||||
UptimeStr string // uptime from control port
|
||||
ORPortReachable map[string]bool // host IP → whether we can TCP connect to their 9001 from this node
|
||||
}
|
||||
|
||||
// Collect gathers data from all nodes in parallel.
|
||||
func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose bool) *ClusterData {
|
||||
start := time.Now()
|
||||
@ -246,6 +262,10 @@ func Collect(ctx context.Context, nodes []Node, subsystems []string, verbose boo
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Second pass: cross-node ORPort reachability (needs all nodes collected first)
|
||||
collectAnyoneReachability(ctx, data)
|
||||
|
||||
data.Duration = time.Since(start)
|
||||
return data
|
||||
}
|
||||
@ -286,6 +306,9 @@ func collectNode(ctx context.Context, node Node, subsystems []string, verbose bo
|
||||
if shouldCollect("network") {
|
||||
nd.Network = collectNetwork(ctx, node, nd.WireGuard)
|
||||
}
|
||||
if shouldCollect("anyone") {
|
||||
nd.Anyone = collectAnyone(ctx, node)
|
||||
}
|
||||
// Namespace collection — always collect if any subsystem is collected
|
||||
nd.Namespaces = collectNamespaces(ctx, node)
|
||||
|
||||
@ -1113,6 +1136,139 @@ echo "$SEP"
|
||||
return data
|
||||
}
|
||||
|
||||
func collectAnyone(ctx context.Context, node Node) *AnyoneData {
|
||||
data := &AnyoneData{
|
||||
ORPortReachable: make(map[string]bool),
|
||||
}
|
||||
|
||||
cmd := `
|
||||
SEP="===INSPECTOR_SEP==="
|
||||
echo "$SEP"
|
||||
systemctl is-active debros-anyone-relay 2>/dev/null || echo inactive
|
||||
echo "$SEP"
|
||||
systemctl is-active debros-anyone-client 2>/dev/null || echo inactive
|
||||
echo "$SEP"
|
||||
ss -tlnp 2>/dev/null | grep -q ':9001 ' && echo yes || echo no
|
||||
echo "$SEP"
|
||||
ss -tlnp 2>/dev/null | grep -q ':9050 ' && echo yes || echo no
|
||||
echo "$SEP"
|
||||
ss -tlnp 2>/dev/null | grep -q ':9051 ' && echo yes || echo no
|
||||
echo "$SEP"
|
||||
# Check bootstrap status from log (last 50 lines)
|
||||
grep -oP 'Bootstrapped \K[0-9]+' /var/log/anon/notices.log 2>/dev/null | tail -1 || echo 0
|
||||
echo "$SEP"
|
||||
# Read fingerprint
|
||||
cat /var/lib/anon/fingerprint 2>/dev/null || echo ""
|
||||
echo "$SEP"
|
||||
# Read nickname from config
|
||||
grep -oP '^Nickname \K\S+' /etc/anon/anonrc 2>/dev/null || echo ""
|
||||
`
|
||||
|
||||
res := RunSSH(ctx, node, cmd)
|
||||
if !res.OK() && res.Stdout == "" {
|
||||
return data
|
||||
}
|
||||
|
||||
parts := strings.Split(res.Stdout, "===INSPECTOR_SEP===")
|
||||
|
||||
if len(parts) > 1 {
|
||||
data.RelayActive = strings.TrimSpace(parts[1]) == "active"
|
||||
}
|
||||
if len(parts) > 2 {
|
||||
data.ClientActive = strings.TrimSpace(parts[2]) == "active"
|
||||
}
|
||||
if len(parts) > 3 {
|
||||
data.ORPortListening = strings.TrimSpace(parts[3]) == "yes"
|
||||
}
|
||||
if len(parts) > 4 {
|
||||
data.SocksListening = strings.TrimSpace(parts[4]) == "yes"
|
||||
}
|
||||
if len(parts) > 5 {
|
||||
data.ControlListening = strings.TrimSpace(parts[5]) == "yes"
|
||||
}
|
||||
if len(parts) > 6 {
|
||||
pct := parseIntDefault(strings.TrimSpace(parts[6]), 0)
|
||||
data.BootstrapPct = pct
|
||||
data.Bootstrapped = pct >= 100
|
||||
}
|
||||
if len(parts) > 7 {
|
||||
data.Fingerprint = strings.TrimSpace(parts[7])
|
||||
}
|
||||
if len(parts) > 8 {
|
||||
data.Nickname = strings.TrimSpace(parts[8])
|
||||
}
|
||||
|
||||
// If neither relay nor client is active, skip further checks
|
||||
if !data.RelayActive && !data.ClientActive {
|
||||
return data
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
// collectAnyoneReachability runs a second pass to check ORPort reachability across nodes.
|
||||
// Called after all nodes are collected so we know which nodes run relays.
|
||||
func collectAnyoneReachability(ctx context.Context, data *ClusterData) {
|
||||
// Find all nodes running the relay (have ORPort listening)
|
||||
var relayHosts []string
|
||||
for host, nd := range data.Nodes {
|
||||
if nd.Anyone != nil && nd.Anyone.RelayActive && nd.Anyone.ORPortListening {
|
||||
relayHosts = append(relayHosts, host)
|
||||
}
|
||||
}
|
||||
|
||||
if len(relayHosts) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// From each node, try to TCP connect to each relay's ORPort 9001
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for _, nd := range data.Nodes {
|
||||
if nd.Anyone == nil {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(nd *NodeData) {
|
||||
defer wg.Done()
|
||||
|
||||
// Build commands to test TCP connectivity to each relay
|
||||
var tcpCmds string
|
||||
for _, relayHost := range relayHosts {
|
||||
if relayHost == nd.Node.Host {
|
||||
continue // skip self
|
||||
}
|
||||
tcpCmds += fmt.Sprintf(
|
||||
`echo "ORPORT:%s:$(timeout 3 bash -c 'echo >/dev/tcp/%s/9001' 2>/dev/null && echo ok || echo fail)"
|
||||
`, relayHost, relayHost)
|
||||
}
|
||||
|
||||
if tcpCmds == "" {
|
||||
return
|
||||
}
|
||||
|
||||
res := RunSSH(ctx, nd.Node, tcpCmds)
|
||||
if res.Stdout == "" {
|
||||
return
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
for _, line := range strings.Split(res.Stdout, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if strings.HasPrefix(line, "ORPORT:") {
|
||||
p := strings.SplitN(line, ":", 3)
|
||||
if len(p) == 3 {
|
||||
nd.Anyone.ORPortReachable[p[1]] = p[2] == "ok"
|
||||
}
|
||||
}
|
||||
}
|
||||
}(nd)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func collectNamespaces(ctx context.Context, node Node) []NamespaceData {
|
||||
// Detect namespace services: debros-namespace-gateway@<name>.service
|
||||
cmd := `
|
||||
|
||||
354
pkg/inspector/results_writer.go
Normal file
354
pkg/inspector/results_writer.go
Normal file
@ -0,0 +1,354 @@
|
||||
package inspector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// FailureGroup groups identical check failures/warnings across nodes.
|
||||
type FailureGroup struct {
|
||||
ID string
|
||||
Name string // from first check in group
|
||||
Status Status
|
||||
Severity Severity
|
||||
Subsystem string
|
||||
Nodes []string // affected node names (deduplicated)
|
||||
Messages []string // unique messages (capped at 5)
|
||||
Count int // total raw occurrence count (before dedup)
|
||||
}
|
||||
|
||||
// GroupFailures collapses CheckResults into unique failure groups keyed by (ID, Status).
|
||||
// Only failures and warnings are grouped; passes and skips are ignored.
|
||||
func GroupFailures(results *Results) []FailureGroup {
|
||||
type groupKey struct {
|
||||
ID string
|
||||
Status Status
|
||||
}
|
||||
|
||||
seen := map[groupKey]*FailureGroup{}
|
||||
nodesSeen := map[groupKey]map[string]bool{}
|
||||
var order []groupKey
|
||||
|
||||
for _, c := range results.Checks {
|
||||
if c.Status != StatusFail && c.Status != StatusWarn {
|
||||
continue
|
||||
}
|
||||
k := groupKey{ID: c.ID, Status: c.Status}
|
||||
g, exists := seen[k]
|
||||
if !exists {
|
||||
g = &FailureGroup{
|
||||
ID: c.ID,
|
||||
Name: c.Name,
|
||||
Status: c.Status,
|
||||
Severity: c.Severity,
|
||||
Subsystem: c.Subsystem,
|
||||
}
|
||||
seen[k] = g
|
||||
nodesSeen[k] = map[string]bool{}
|
||||
order = append(order, k)
|
||||
}
|
||||
g.Count++
|
||||
node := c.Node
|
||||
if node == "" {
|
||||
node = "cluster-wide"
|
||||
}
|
||||
// Deduplicate nodes (a node may appear for multiple targets)
|
||||
if !nodesSeen[k][node] {
|
||||
nodesSeen[k][node] = true
|
||||
g.Nodes = append(g.Nodes, node)
|
||||
}
|
||||
// Track unique messages (cap at 5 to avoid bloat)
|
||||
if len(g.Messages) < 5 {
|
||||
found := false
|
||||
for _, m := range g.Messages {
|
||||
if m == c.Message {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
g.Messages = append(g.Messages, c.Message)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort: failures before warnings, then by severity (high first), then by ID
|
||||
groups := make([]FailureGroup, 0, len(order))
|
||||
for _, k := range order {
|
||||
groups = append(groups, *seen[k])
|
||||
}
|
||||
sort.Slice(groups, func(i, j int) bool {
|
||||
oi, oj := statusOrder(groups[i].Status), statusOrder(groups[j].Status)
|
||||
if oi != oj {
|
||||
return oi < oj
|
||||
}
|
||||
if groups[i].Severity != groups[j].Severity {
|
||||
return groups[i].Severity > groups[j].Severity
|
||||
}
|
||||
return groups[i].ID < groups[j].ID
|
||||
})
|
||||
|
||||
return groups
|
||||
}
|
||||
|
||||
// WriteResults saves inspection results as markdown files to a timestamped directory.
|
||||
// Returns the output directory path.
|
||||
func WriteResults(baseDir, env string, results *Results, data *ClusterData, analysis *AnalysisResult) (string, error) {
|
||||
ts := time.Now().Format("2006-01-02_150405")
|
||||
dir := filepath.Join(baseDir, env, ts)
|
||||
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return "", fmt.Errorf("create output directory: %w", err)
|
||||
}
|
||||
|
||||
groups := GroupFailures(results)
|
||||
|
||||
// Build analysis lookup: groupID -> analysis text
|
||||
analysisMap := map[string]string{}
|
||||
if analysis != nil {
|
||||
for _, sa := range analysis.Analyses {
|
||||
key := sa.GroupID
|
||||
if key == "" {
|
||||
key = sa.Subsystem
|
||||
}
|
||||
if sa.Error == nil {
|
||||
analysisMap[key] = sa.Analysis
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write summary.md
|
||||
if err := writeSummary(dir, env, ts, results, data, groups, analysisMap); err != nil {
|
||||
return "", fmt.Errorf("write summary: %w", err)
|
||||
}
|
||||
|
||||
// Group checks by subsystem for per-subsystem files
|
||||
checksBySubsystem := map[string][]CheckResult{}
|
||||
for _, c := range results.Checks {
|
||||
checksBySubsystem[c.Subsystem] = append(checksBySubsystem[c.Subsystem], c)
|
||||
}
|
||||
|
||||
groupsBySubsystem := map[string][]FailureGroup{}
|
||||
for _, g := range groups {
|
||||
groupsBySubsystem[g.Subsystem] = append(groupsBySubsystem[g.Subsystem], g)
|
||||
}
|
||||
|
||||
// Write per-subsystem files
|
||||
for sub, checks := range checksBySubsystem {
|
||||
subGroups := groupsBySubsystem[sub]
|
||||
if err := writeSubsystem(dir, sub, ts, checks, subGroups, analysisMap); err != nil {
|
||||
return "", fmt.Errorf("write %s: %w", sub, err)
|
||||
}
|
||||
}
|
||||
|
||||
return dir, nil
|
||||
}
|
||||
|
||||
func writeSummary(dir, env, ts string, results *Results, data *ClusterData, groups []FailureGroup, analysisMap map[string]string) error {
|
||||
var b strings.Builder
|
||||
passed, failed, warned, skipped := results.Summary()
|
||||
|
||||
b.WriteString(fmt.Sprintf("# %s Inspection Report\n\n", strings.ToUpper(env)))
|
||||
b.WriteString(fmt.Sprintf("**Date:** %s \n", ts))
|
||||
b.WriteString(fmt.Sprintf("**Nodes:** %d \n", len(data.Nodes)))
|
||||
b.WriteString(fmt.Sprintf("**Total:** %d passed, %d failed, %d warnings, %d skipped \n\n", passed, failed, warned, skipped))
|
||||
|
||||
// Per-subsystem table
|
||||
subStats := map[string][4]int{} // [pass, fail, warn, skip]
|
||||
var subsystems []string
|
||||
for _, c := range results.Checks {
|
||||
if _, exists := subStats[c.Subsystem]; !exists {
|
||||
subsystems = append(subsystems, c.Subsystem)
|
||||
}
|
||||
s := subStats[c.Subsystem]
|
||||
switch c.Status {
|
||||
case StatusPass:
|
||||
s[0]++
|
||||
case StatusFail:
|
||||
s[1]++
|
||||
case StatusWarn:
|
||||
s[2]++
|
||||
case StatusSkip:
|
||||
s[3]++
|
||||
}
|
||||
subStats[c.Subsystem] = s
|
||||
}
|
||||
sort.Strings(subsystems)
|
||||
|
||||
// Count issue groups per subsystem
|
||||
issueCountBySub := map[string]int{}
|
||||
for _, g := range groups {
|
||||
issueCountBySub[g.Subsystem]++
|
||||
}
|
||||
|
||||
b.WriteString("## Subsystems\n\n")
|
||||
b.WriteString("| Subsystem | Pass | Fail | Warn | Skip | Issues |\n")
|
||||
b.WriteString("|-----------|------|------|------|------|--------|\n")
|
||||
for _, sub := range subsystems {
|
||||
s := subStats[sub]
|
||||
issues := issueCountBySub[sub]
|
||||
link := fmt.Sprintf("[%s](%s.md)", sub, sub)
|
||||
b.WriteString(fmt.Sprintf("| %s | %d | %d | %d | %d | %d |\n", link, s[0], s[1], s[2], s[3], issues))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// Critical issues section
|
||||
critical := filterGroupsBySeverity(groups, High)
|
||||
if len(critical) > 0 {
|
||||
b.WriteString("## Critical Issues\n\n")
|
||||
for i, g := range critical {
|
||||
icon := "FAIL"
|
||||
if g.Status == StatusWarn {
|
||||
icon = "WARN"
|
||||
}
|
||||
nodeInfo := fmt.Sprintf("%d nodes", len(g.Nodes))
|
||||
if g.Count > len(g.Nodes) {
|
||||
nodeInfo = fmt.Sprintf("%d nodes (%d occurrences)", len(g.Nodes), g.Count)
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("%d. **[%s]** %s — %s \n", i+1, icon, g.Name, nodeInfo))
|
||||
b.WriteString(fmt.Sprintf(" *%s* → [details](%s.md#%s) \n",
|
||||
g.Messages[0], g.Subsystem, anchorID(g.Name)))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
// Collection errors
|
||||
var errs []string
|
||||
for _, nd := range data.Nodes {
|
||||
for _, e := range nd.Errors {
|
||||
errs = append(errs, fmt.Sprintf("- **%s**: %s", nd.Node.Name(), e))
|
||||
}
|
||||
}
|
||||
if len(errs) > 0 {
|
||||
b.WriteString("## Collection Errors\n\n")
|
||||
for _, e := range errs {
|
||||
b.WriteString(e + "\n")
|
||||
}
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
return os.WriteFile(filepath.Join(dir, "summary.md"), []byte(b.String()), 0o644)
|
||||
}
|
||||
|
||||
func writeSubsystem(dir, subsystem, ts string, checks []CheckResult, groups []FailureGroup, analysisMap map[string]string) error {
|
||||
var b strings.Builder
|
||||
|
||||
// Count
|
||||
var passed, failed, warned, skipped int
|
||||
for _, c := range checks {
|
||||
switch c.Status {
|
||||
case StatusPass:
|
||||
passed++
|
||||
case StatusFail:
|
||||
failed++
|
||||
case StatusWarn:
|
||||
warned++
|
||||
case StatusSkip:
|
||||
skipped++
|
||||
}
|
||||
}
|
||||
|
||||
b.WriteString(fmt.Sprintf("# %s\n\n", strings.ToUpper(subsystem)))
|
||||
b.WriteString(fmt.Sprintf("**Date:** %s \n", ts))
|
||||
b.WriteString(fmt.Sprintf("**Checks:** %d passed, %d failed, %d warnings, %d skipped \n\n", passed, failed, warned, skipped))
|
||||
|
||||
// Issues section
|
||||
if len(groups) > 0 {
|
||||
b.WriteString("## Issues\n\n")
|
||||
for i, g := range groups {
|
||||
icon := "FAIL"
|
||||
if g.Status == StatusWarn {
|
||||
icon = "WARN"
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("### %d. %s\n\n", i+1, g.Name))
|
||||
nodeInfo := fmt.Sprintf("%d nodes", len(g.Nodes))
|
||||
if g.Count > len(g.Nodes) {
|
||||
nodeInfo = fmt.Sprintf("%d nodes (%d occurrences)", len(g.Nodes), g.Count)
|
||||
}
|
||||
b.WriteString(fmt.Sprintf("**Status:** %s | **Severity:** %s | **Affected:** %s \n\n", icon, g.Severity, nodeInfo))
|
||||
|
||||
// Affected nodes
|
||||
b.WriteString("**Affected nodes:**\n")
|
||||
for _, n := range g.Nodes {
|
||||
b.WriteString(fmt.Sprintf("- `%s`\n", n))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
|
||||
// Messages
|
||||
if len(g.Messages) == 1 {
|
||||
b.WriteString(fmt.Sprintf("**Detail:** %s\n\n", g.Messages[0]))
|
||||
} else {
|
||||
b.WriteString("**Details:**\n")
|
||||
for _, m := range g.Messages {
|
||||
b.WriteString(fmt.Sprintf("- %s\n", m))
|
||||
}
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
// AI analysis (if available)
|
||||
if ai, ok := analysisMap[g.ID]; ok {
|
||||
b.WriteString(ai)
|
||||
b.WriteString("\n\n")
|
||||
}
|
||||
|
||||
b.WriteString("---\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// All checks table
|
||||
b.WriteString("## All Checks\n\n")
|
||||
b.WriteString("| Status | Severity | Check | Node | Detail |\n")
|
||||
b.WriteString("|--------|----------|-------|------|--------|\n")
|
||||
|
||||
// Sort: failures first
|
||||
sorted := make([]CheckResult, len(checks))
|
||||
copy(sorted, checks)
|
||||
sort.Slice(sorted, func(i, j int) bool {
|
||||
oi, oj := statusOrder(sorted[i].Status), statusOrder(sorted[j].Status)
|
||||
if oi != oj {
|
||||
return oi < oj
|
||||
}
|
||||
if sorted[i].Severity != sorted[j].Severity {
|
||||
return sorted[i].Severity > sorted[j].Severity
|
||||
}
|
||||
return sorted[i].ID < sorted[j].ID
|
||||
})
|
||||
|
||||
for _, c := range sorted {
|
||||
node := c.Node
|
||||
if node == "" {
|
||||
node = "cluster-wide"
|
||||
}
|
||||
msg := strings.ReplaceAll(c.Message, "|", "\\|")
|
||||
b.WriteString(fmt.Sprintf("| %s | %s | %s | %s | %s |\n",
|
||||
statusIcon(c.Status), c.Severity, c.Name, node, msg))
|
||||
}
|
||||
|
||||
return os.WriteFile(filepath.Join(dir, subsystem+".md"), []byte(b.String()), 0o644)
|
||||
}
|
||||
|
||||
func filterGroupsBySeverity(groups []FailureGroup, minSeverity Severity) []FailureGroup {
|
||||
var out []FailureGroup
|
||||
for _, g := range groups {
|
||||
if g.Severity >= minSeverity {
|
||||
out = append(out, g)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func anchorID(name string) string {
|
||||
s := strings.ToLower(name)
|
||||
s = strings.ReplaceAll(s, " ", "-")
|
||||
s = strings.Map(func(r rune) rune {
|
||||
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' {
|
||||
return r
|
||||
}
|
||||
return -1
|
||||
}, s)
|
||||
return s
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user