From b5dfcab1d621bb99ecbe5c8eb238ee21ef2dbde1 Mon Sep 17 00:00:00 2001 From: anonpenguin23 Date: Wed, 11 Feb 2026 15:08:20 +0200 Subject: [PATCH] Collector updates and bug fixes + patches scripts created --- pkg/cli/production/install/orchestrator.go | 8 + pkg/cli/production/upgrade/orchestrator.go | 56 ++++-- .../production/installers/anyone_relay.go | 31 +++ pkg/environments/production/preferences.go | 2 + pkg/inspector/checks/anyone.go | 126 ++++++------ pkg/inspector/checks/anyone_test.go | 180 +++++++++++++++++- pkg/inspector/checks/rqlite.go | 14 +- pkg/inspector/checks/system.go | 24 ++- pkg/inspector/checks/system_test.go | 16 +- pkg/inspector/collector.go | 61 ++++-- scripts/patches/fix-logrotate.sh | 106 +++++++++++ scripts/patches/fix-ufw-orport.sh | 85 +++++++++ 12 files changed, 594 insertions(+), 115 deletions(-) create mode 100755 scripts/patches/fix-logrotate.sh create mode 100755 scripts/patches/fix-ufw-orport.sh diff --git a/pkg/cli/production/install/orchestrator.go b/pkg/cli/production/install/orchestrator.go index b30a9f1..2a3d01d 100644 --- a/pkg/cli/production/install/orchestrator.go +++ b/pkg/cli/production/install/orchestrator.go @@ -124,10 +124,18 @@ func (o *Orchestrator) Execute() error { } // Save preferences for future upgrades + anyoneORPort := 0 + if o.flags.AnyoneRelay && o.flags.AnyoneORPort > 0 { + anyoneORPort = o.flags.AnyoneORPort + } else if o.flags.AnyoneRelay { + anyoneORPort = 9001 + } prefs := &production.NodePreferences{ Branch: o.flags.Branch, Nameserver: o.flags.Nameserver, AnyoneClient: o.flags.AnyoneClient, + AnyoneRelay: o.flags.AnyoneRelay, + AnyoneORPort: anyoneORPort, } if err := production.SavePreferences(o.oramaDir, prefs); err != nil { fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to save preferences: %v\n", err) diff --git a/pkg/cli/production/upgrade/orchestrator.go b/pkg/cli/production/upgrade/orchestrator.go index 3a75dc5..a297172 100644 --- a/pkg/cli/production/upgrade/orchestrator.go +++ b/pkg/cli/production/upgrade/orchestrator.go @@ -50,17 +50,27 @@ func NewOrchestrator(flags *Flags) *Orchestrator { // Configure Anyone mode (flag > saved preference) if flags.AnyoneRelay { setup.SetAnyoneRelayConfig(&production.AnyoneRelayConfig{ - Enabled: true, - Exit: flags.AnyoneExit, - Migrate: flags.AnyoneMigrate, - Nickname: flags.AnyoneNickname, - Contact: flags.AnyoneContact, - Wallet: flags.AnyoneWallet, - ORPort: flags.AnyoneORPort, - MyFamily: flags.AnyoneFamily, - BandwidthPct: flags.AnyoneBandwidth, + Enabled: true, + Exit: flags.AnyoneExit, + Migrate: flags.AnyoneMigrate, + Nickname: flags.AnyoneNickname, + Contact: flags.AnyoneContact, + Wallet: flags.AnyoneWallet, + ORPort: flags.AnyoneORPort, + MyFamily: flags.AnyoneFamily, + BandwidthPct: flags.AnyoneBandwidth, AccountingMax: flags.AnyoneAccounting, }) + } else if prefs.AnyoneRelay { + // Restore relay config from saved preferences (for firewall rules) + orPort := prefs.AnyoneORPort + if orPort == 0 { + orPort = 9001 + } + setup.SetAnyoneRelayConfig(&production.AnyoneRelayConfig{ + Enabled: true, + ORPort: orPort, + }) } else if flags.AnyoneClient || prefs.AnyoneClient { setup.SetAnyoneClient(true) } @@ -216,6 +226,16 @@ func (o *Orchestrator) handleBranchPreferences() error { prefsChanged = true } + // If anyone-relay was explicitly provided, update it + if o.flags.AnyoneRelay { + prefs.AnyoneRelay = true + prefs.AnyoneORPort = o.flags.AnyoneORPort + if prefs.AnyoneORPort == 0 { + prefs.AnyoneORPort = 9001 + } + prefsChanged = true + } + // Save preferences if anything changed if prefsChanged { if err := production.SavePreferences(o.oramaDir, prefs); err != nil { @@ -227,8 +247,8 @@ func (o *Orchestrator) handleBranchPreferences() error { // ClusterState represents the saved state of the RQLite cluster before shutdown type ClusterState struct { - Nodes []ClusterNode `json:"nodes"` - CapturedAt time.Time `json:"captured_at"` + Nodes []ClusterNode `json:"nodes"` + CapturedAt time.Time `json:"captured_at"` } // ClusterNode represents a node in the cluster @@ -358,13 +378,13 @@ func (o *Orchestrator) stopServices() error { // Stop services in reverse dependency order services := []string{ - "caddy.service", // Depends on node - "coredns.service", // Depends on node - "debros-gateway.service", // Legacy - "debros-node.service", // Depends on cluster, olric - "debros-ipfs-cluster.service", // Depends on IPFS - "debros-ipfs.service", // Base IPFS - "debros-olric.service", // Independent + "caddy.service", // Depends on node + "coredns.service", // Depends on node + "debros-gateway.service", // Legacy + "debros-node.service", // Depends on cluster, olric + "debros-ipfs-cluster.service", // Depends on IPFS + "debros-ipfs.service", // Base IPFS + "debros-olric.service", // Independent "debros-anyone-client.service", // Client mode "debros-anyone-relay.service", // Relay mode } diff --git a/pkg/environments/production/installers/anyone_relay.go b/pkg/environments/production/installers/anyone_relay.go index 3c9b726..02688b1 100644 --- a/pkg/environments/production/installers/anyone_relay.go +++ b/pkg/environments/production/installers/anyone_relay.go @@ -198,6 +198,12 @@ func (ari *AnyoneRelayInstaller) Install() error { exec.Command("systemctl", "stop", "anon").Run() exec.Command("systemctl", "disable", "anon").Run() + // Fix logrotate: the apt package installs /etc/logrotate.d/anon with + // "invoke-rc.d anon reload" in postrotate, but we disabled the anon service. + // Without this fix, log rotation leaves an empty notices.log and the relay + // keeps writing to the old (rotated) file descriptor. + ari.fixLogrotate() + fmt.Fprintf(ari.logWriter, " ✓ Anyone relay binary installed\n") // Install nyx for relay monitoring (connects to ControlPort 9051) @@ -208,6 +214,31 @@ func (ari *AnyoneRelayInstaller) Install() error { return nil } +// fixLogrotate replaces the apt-provided logrotate config which uses +// "invoke-rc.d anon reload" (broken because we disable the anon service). +// Without this, log rotation creates an empty notices.log but the relay +// process keeps writing to the old file descriptor, so bootstrap detection +// and all log-based monitoring breaks after the first midnight rotation. +func (ari *AnyoneRelayInstaller) fixLogrotate() { + config := `/var/log/anon/*log { + daily + rotate 5 + compress + delaycompress + missingok + notifempty + create 0640 debian-anon adm + sharedscripts + postrotate + /usr/bin/killall -HUP anon 2>/dev/null || true + endscript +} +` + if err := os.WriteFile("/etc/logrotate.d/anon", []byte(config), 0644); err != nil { + fmt.Fprintf(ari.logWriter, " ⚠️ logrotate fix warning: %v\n", err) + } +} + // installNyx installs the nyx relay monitor tool func (ari *AnyoneRelayInstaller) installNyx() error { // Check if already installed diff --git a/pkg/environments/production/preferences.go b/pkg/environments/production/preferences.go index ea34f05..38da5d5 100644 --- a/pkg/environments/production/preferences.go +++ b/pkg/environments/production/preferences.go @@ -12,6 +12,8 @@ type NodePreferences struct { Branch string `yaml:"branch"` Nameserver bool `yaml:"nameserver"` AnyoneClient bool `yaml:"anyone_client"` + AnyoneRelay bool `yaml:"anyone_relay"` + AnyoneORPort int `yaml:"anyone_orport,omitempty"` // typically 9001 } const preferencesFile = "preferences.yaml" diff --git a/pkg/inspector/checks/anyone.go b/pkg/inspector/checks/anyone.go index 6493513..d782627 100644 --- a/pkg/inspector/checks/anyone.go +++ b/pkg/inspector/checks/anyone.go @@ -38,18 +38,22 @@ func checkAnyonePerNode(nd *inspector.NodeData) []inspector.CheckResult { return r } - // --- Relay checks --- + isClientMode := a.Mode == "client" + if a.RelayActive { r = append(r, inspector.Pass("anyone.relay_active", "Anyone relay service active", anyoneSub, node, "debros-anyone-relay is active", inspector.High)) + } - // ORPort listening - if a.ORPortListening { - r = append(r, inspector.Pass("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node, - "port 9001 bound", inspector.High)) + // --- Client-mode checks --- + if isClientMode { + // SOCKS5 port + if a.SocksListening { + r = append(r, inspector.Pass("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node, + "port 9050 bound", inspector.High)) } else { - r = append(r, inspector.Fail("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node, - "port 9001 NOT bound", inspector.High)) + r = append(r, inspector.Fail("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node, + "port 9050 NOT bound (traffic cannot route through anonymity network)", inspector.High)) } // Control port @@ -61,49 +65,82 @@ func checkAnyonePerNode(nd *inspector.NodeData) []inspector.CheckResult { "port 9051 NOT bound (monitoring unavailable)", inspector.Low)) } - // Bootstrap status + // Bootstrap (clients also bootstrap to the network) if a.Bootstrapped { - r = append(r, inspector.Pass("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node, + r = append(r, inspector.Pass("anyone.client_bootstrapped", "Client bootstrapped", anyoneSub, node, fmt.Sprintf("bootstrap=%d%%", a.BootstrapPct), inspector.High)) } else if a.BootstrapPct > 0 { - r = append(r, inspector.Warn("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node, + r = append(r, inspector.Warn("anyone.client_bootstrapped", "Client bootstrapped", anyoneSub, node, fmt.Sprintf("bootstrap=%d%% (still connecting)", a.BootstrapPct), inspector.High)) } else { - r = append(r, inspector.Fail("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node, + r = append(r, inspector.Fail("anyone.client_bootstrapped", "Client bootstrapped", anyoneSub, node, "bootstrap=0% (not started or log missing)", inspector.High)) } - // Fingerprint present - if a.Fingerprint != "" { - r = append(r, inspector.Pass("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node, - fmt.Sprintf("fingerprint=%s", a.Fingerprint), inspector.Medium)) - } else { - r = append(r, inspector.Warn("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node, - "no fingerprint found (relay may not have generated keys yet)", inspector.Medium)) - } - - // Nickname configured - if a.Nickname != "" { - r = append(r, inspector.Pass("anyone.nickname", "Relay nickname configured", anyoneSub, node, - fmt.Sprintf("nickname=%s", a.Nickname), inspector.Low)) - } else { - r = append(r, inspector.Warn("anyone.nickname", "Relay nickname configured", anyoneSub, node, - "no nickname in /etc/anon/anonrc", inspector.Low)) - } + return r } - // --- Client checks --- + // --- Relay-mode checks --- + + // ORPort listening + if a.ORPortListening { + r = append(r, inspector.Pass("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node, + "port 9001 bound", inspector.High)) + } else { + r = append(r, inspector.Fail("anyone.orport_listening", "ORPort 9001 listening", anyoneSub, node, + "port 9001 NOT bound", inspector.High)) + } + + // Control port + if a.ControlListening { + r = append(r, inspector.Pass("anyone.control_listening", "Control port 9051 listening", anyoneSub, node, + "port 9051 bound", inspector.Low)) + } else { + r = append(r, inspector.Warn("anyone.control_listening", "Control port 9051 listening", anyoneSub, node, + "port 9051 NOT bound (monitoring unavailable)", inspector.Low)) + } + + // Bootstrap status + if a.Bootstrapped { + r = append(r, inspector.Pass("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node, + fmt.Sprintf("bootstrap=%d%%", a.BootstrapPct), inspector.High)) + } else if a.BootstrapPct > 0 { + r = append(r, inspector.Warn("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node, + fmt.Sprintf("bootstrap=%d%% (still connecting)", a.BootstrapPct), inspector.High)) + } else { + r = append(r, inspector.Fail("anyone.bootstrapped", "Relay bootstrapped", anyoneSub, node, + "bootstrap=0% (not started or log missing)", inspector.High)) + } + + // Fingerprint present + if a.Fingerprint != "" { + r = append(r, inspector.Pass("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node, + fmt.Sprintf("fingerprint=%s", a.Fingerprint), inspector.Medium)) + } else { + r = append(r, inspector.Warn("anyone.fingerprint", "Relay has fingerprint", anyoneSub, node, + "no fingerprint found (relay may not have generated keys yet)", inspector.Medium)) + } + + // Nickname configured + if a.Nickname != "" { + r = append(r, inspector.Pass("anyone.nickname", "Relay nickname configured", anyoneSub, node, + fmt.Sprintf("nickname=%s", a.Nickname), inspector.Low)) + } else { + r = append(r, inspector.Warn("anyone.nickname", "Relay nickname configured", anyoneSub, node, + "no nickname in /etc/anon/anonrc", inspector.Low)) + } + + // --- Legacy client checks (if also running client service) --- if a.ClientActive { r = append(r, inspector.Pass("anyone.client_active", "Anyone client service active", anyoneSub, node, "debros-anyone-client is active", inspector.High)) - // SOCKS5 port listening if a.SocksListening { r = append(r, inspector.Pass("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node, "port 9050 bound", inspector.High)) } else { r = append(r, inspector.Fail("anyone.socks_listening", "SOCKS5 port 9050 listening", anyoneSub, node, - "port 9050 NOT bound (IPFS traffic cannot route through anonymity network)", inspector.High)) + "port 9050 NOT bound", inspector.High)) } } @@ -113,32 +150,7 @@ func checkAnyonePerNode(nd *inspector.NodeData) []inspector.CheckResult { func checkAnyoneCrossNode(data *inspector.ClusterData) []inspector.CheckResult { var r []inspector.CheckResult - // Count relay and client nodes - relayActive := 0 - relayTotal := 0 - clientActive := 0 - clientTotal := 0 - - for _, nd := range data.Nodes { - if nd.Anyone == nil { - continue - } - if nd.Anyone.RelayActive { - relayActive++ - relayTotal++ - } - if nd.Anyone.ClientActive { - clientActive++ - clientTotal++ - } - } - - // Skip cross-node checks if no Anyone services at all - if relayTotal == 0 && clientTotal == 0 { - return r - } - - // ORPort reachability: check if relays are publicly accessible from other nodes + // ORPort reachability: only check from/to relay-mode nodes orportChecked := 0 orportReachable := 0 orportFailed := 0 diff --git a/pkg/inspector/checks/anyone_test.go b/pkg/inspector/checks/anyone_test.go index 48ca6c9..55aff74 100644 --- a/pkg/inspector/checks/anyone_test.go +++ b/pkg/inspector/checks/anyone_test.go @@ -31,6 +31,7 @@ func TestCheckAnyone_HealthyRelay(t *testing.T) { nd := makeNodeData("1.1.1.1", "node") nd.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, ControlListening: true, Bootstrapped: true, @@ -52,24 +53,79 @@ func TestCheckAnyone_HealthyRelay(t *testing.T) { } func TestCheckAnyone_HealthyClient(t *testing.T) { - nd := makeNodeData("1.1.1.1", "node") + nd := makeNodeData("1.1.1.1", "nameserver") nd.Anyone = &inspector.AnyoneData{ - ClientActive: true, + RelayActive: true, // service is debros-anyone-relay for both modes + Mode: "client", + SocksListening: true, + ControlListening: true, + Bootstrapped: true, + BootstrapPct: 100, + ORPortReachable: make(map[string]bool), + } + + data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd}) + results := CheckAnyone(data) + + expectStatus(t, results, "anyone.relay_active", inspector.StatusPass) + expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass) + expectStatus(t, results, "anyone.control_listening", inspector.StatusPass) + expectStatus(t, results, "anyone.client_bootstrapped", inspector.StatusPass) + + // Should NOT have relay-specific checks + if findCheck(results, "anyone.orport_listening") != nil { + t.Error("client-mode node should not have ORPort check") + } + if findCheck(results, "anyone.bootstrapped") != nil { + t.Error("client-mode node should not have relay bootstrap check") + } + if findCheck(results, "anyone.fingerprint") != nil { + t.Error("client-mode node should not have fingerprint check") + } + if findCheck(results, "anyone.nickname") != nil { + t.Error("client-mode node should not have nickname check") + } +} + +func TestCheckAnyone_ClientNotBootstrapped(t *testing.T) { + nd := makeNodeData("1.1.1.1", "nameserver") + nd.Anyone = &inspector.AnyoneData{ + RelayActive: true, + Mode: "client", SocksListening: true, + BootstrapPct: 0, + Bootstrapped: false, ORPortReachable: make(map[string]bool), } data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd}) results := CheckAnyone(data) - expectStatus(t, results, "anyone.client_active", inspector.StatusPass) - expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass) + expectStatus(t, results, "anyone.client_bootstrapped", inspector.StatusFail) +} + +func TestCheckAnyone_ClientPartialBootstrap(t *testing.T) { + nd := makeNodeData("1.1.1.1", "nameserver") + nd.Anyone = &inspector.AnyoneData{ + RelayActive: true, + Mode: "client", + SocksListening: true, + BootstrapPct: 50, + Bootstrapped: false, + ORPortReachable: make(map[string]bool), + } + + data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd}) + results := CheckAnyone(data) + + expectStatus(t, results, "anyone.client_bootstrapped", inspector.StatusWarn) } func TestCheckAnyone_RelayORPortDown(t *testing.T) { nd := makeNodeData("1.1.1.1", "node") nd.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: false, ControlListening: true, ORPortReachable: make(map[string]bool), @@ -85,6 +141,7 @@ func TestCheckAnyone_RelayNotBootstrapped(t *testing.T) { nd := makeNodeData("1.1.1.1", "node") nd.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, BootstrapPct: 0, Bootstrapped: false, @@ -101,6 +158,7 @@ func TestCheckAnyone_RelayPartialBootstrap(t *testing.T) { nd := makeNodeData("1.1.1.1", "node") nd.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, BootstrapPct: 75, Bootstrapped: false, @@ -114,9 +172,10 @@ func TestCheckAnyone_RelayPartialBootstrap(t *testing.T) { } func TestCheckAnyone_ClientSocksDown(t *testing.T) { - nd := makeNodeData("1.1.1.1", "node") + nd := makeNodeData("1.1.1.1", "nameserver") nd.Anyone = &inspector.AnyoneData{ - ClientActive: true, + RelayActive: true, + Mode: "client", SocksListening: false, ORPortReachable: make(map[string]bool), } @@ -131,6 +190,7 @@ func TestCheckAnyone_NoFingerprint(t *testing.T) { nd := makeNodeData("1.1.1.1", "node") nd.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, Fingerprint: "", ORPortReachable: make(map[string]bool), @@ -146,6 +206,7 @@ func TestCheckAnyone_CrossNode_ORPortReachable(t *testing.T) { nd1 := makeNodeData("1.1.1.1", "node") nd1.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, ORPortReachable: map[string]bool{"2.2.2.2": true}, } @@ -153,6 +214,7 @@ func TestCheckAnyone_CrossNode_ORPortReachable(t *testing.T) { nd2 := makeNodeData("2.2.2.2", "node") nd2.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, ORPortReachable: map[string]bool{"1.1.1.1": true}, } @@ -167,6 +229,7 @@ func TestCheckAnyone_CrossNode_ORPortUnreachable(t *testing.T) { nd1 := makeNodeData("1.1.1.1", "node") nd1.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, ORPortReachable: map[string]bool{"2.2.2.2": false}, } @@ -174,6 +237,7 @@ func TestCheckAnyone_CrossNode_ORPortUnreachable(t *testing.T) { nd2 := makeNodeData("2.2.2.2", "node") nd2.Anyone = &inspector.AnyoneData{ RelayActive: true, + Mode: "relay", ORPortListening: true, ORPortReachable: map[string]bool{"1.1.1.1": true}, } @@ -198,6 +262,7 @@ func TestCheckAnyone_BothRelayAndClient(t *testing.T) { nd.Anyone = &inspector.AnyoneData{ RelayActive: true, ClientActive: true, + Mode: "relay", // relay mode with legacy client also running ORPortListening: true, SocksListening: true, ControlListening: true, @@ -211,9 +276,110 @@ func TestCheckAnyone_BothRelayAndClient(t *testing.T) { data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd}) results := CheckAnyone(data) - // Should have both relay and client checks + // Should have both relay and legacy client checks expectStatus(t, results, "anyone.relay_active", inspector.StatusPass) expectStatus(t, results, "anyone.client_active", inspector.StatusPass) expectStatus(t, results, "anyone.socks_listening", inspector.StatusPass) expectStatus(t, results, "anyone.orport_listening", inspector.StatusPass) } + +func TestCheckAnyone_ClientModeNoRelayChecks(t *testing.T) { + // A client-mode node should never produce relay-specific check IDs + nd := makeNodeData("1.1.1.1", "nameserver") + nd.Anyone = &inspector.AnyoneData{ + RelayActive: true, + Mode: "client", + SocksListening: true, + Bootstrapped: true, + BootstrapPct: 100, + ORPortReachable: make(map[string]bool), + } + + data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd}) + results := CheckAnyone(data) + + relayOnlyChecks := []string{ + "anyone.orport_listening", + "anyone.bootstrapped", + "anyone.fingerprint", + "anyone.nickname", + "anyone.client_active", + } + for _, id := range relayOnlyChecks { + if findCheck(results, id) != nil { + t.Errorf("client-mode node should not produce check %q", id) + } + } +} + +func TestCheckAnyone_MixedCluster(t *testing.T) { + // Simulate a cluster with both relay and client-mode nodes + relay := makeNodeData("1.1.1.1", "node") + relay.Anyone = &inspector.AnyoneData{ + RelayActive: true, + Mode: "relay", + ORPortListening: true, + ControlListening: true, + Bootstrapped: true, + BootstrapPct: 100, + Fingerprint: "ABCDEF", + Nickname: "relay1", + ORPortReachable: make(map[string]bool), + } + + client := makeNodeData("2.2.2.2", "nameserver") + client.Anyone = &inspector.AnyoneData{ + RelayActive: true, + Mode: "client", + SocksListening: true, + ControlListening: true, + Bootstrapped: true, + BootstrapPct: 100, + ORPortReachable: make(map[string]bool), + } + + data := makeCluster(map[string]*inspector.NodeData{ + "1.1.1.1": relay, + "2.2.2.2": client, + }) + results := CheckAnyone(data) + + // Relay node should have relay checks + relayResults := filterByNode(results, "ubuntu@1.1.1.1") + if findCheckIn(relayResults, "anyone.orport_listening") == nil { + t.Error("relay node should have ORPort check") + } + if findCheckIn(relayResults, "anyone.bootstrapped") == nil { + t.Error("relay node should have relay bootstrap check") + } + + // Client node should have client checks + clientResults := filterByNode(results, "ubuntu@2.2.2.2") + if findCheckIn(clientResults, "anyone.client_bootstrapped") == nil { + t.Error("client node should have client bootstrap check") + } + if findCheckIn(clientResults, "anyone.orport_listening") != nil { + t.Error("client node should NOT have ORPort check") + } +} + +// filterByNode returns checks for a specific node. +func filterByNode(results []inspector.CheckResult, node string) []inspector.CheckResult { + var out []inspector.CheckResult + for _, r := range results { + if r.Node == node { + out = append(out, r) + } + } + return out +} + +// findCheckIn returns a pointer to the first check matching the given ID in a slice. +func findCheckIn(results []inspector.CheckResult, id string) *inspector.CheckResult { + for i := range results { + if results[i].ID == id { + return &results[i] + } + } + return nil +} diff --git a/pkg/inspector/checks/rqlite.go b/pkg/inspector/checks/rqlite.go index b54691e..39576cd 100644 --- a/pkg/inspector/checks/rqlite.go +++ b/pkg/inspector/checks/rqlite.go @@ -174,14 +174,20 @@ func checkRQLitePerNode(nd *inspector.NodeData, data *inspector.ClusterData) []i } } - // 1.15 db_applied_index == fsm_index + // 1.15 db_applied_index close to fsm_index if s.DBAppliedIndex > 0 && s.FsmIndex > 0 { - if s.DBAppliedIndex == s.FsmIndex { + var dbFsmGap uint64 + if s.FsmIndex > s.DBAppliedIndex { + dbFsmGap = s.FsmIndex - s.DBAppliedIndex + } else { + dbFsmGap = s.DBAppliedIndex - s.FsmIndex + } + if dbFsmGap <= 5 { r = append(r, inspector.Pass("rqlite.db_fsm_sync", "DB applied index matches FSM index", rqliteSub, node, - fmt.Sprintf("db_applied=%d fsm=%d", s.DBAppliedIndex, s.FsmIndex), inspector.Critical)) + fmt.Sprintf("db_applied=%d fsm=%d gap=%d", s.DBAppliedIndex, s.FsmIndex, dbFsmGap), inspector.Critical)) } else { r = append(r, inspector.Fail("rqlite.db_fsm_sync", "DB applied index matches FSM index", rqliteSub, node, - fmt.Sprintf("db_applied=%d fsm=%d (diverged)", s.DBAppliedIndex, s.FsmIndex), inspector.Critical)) + fmt.Sprintf("db_applied=%d fsm=%d gap=%d (diverged)", s.DBAppliedIndex, s.FsmIndex, dbFsmGap), inspector.Critical)) } } diff --git a/pkg/inspector/checks/system.go b/pkg/inspector/checks/system.go index ce53e2a..4f6a5fa 100644 --- a/pkg/inspector/checks/system.go +++ b/pkg/inspector/checks/system.go @@ -94,13 +94,25 @@ func checkSystemPerNode(nd *inspector.NodeData) []inspector.CheckResult { } } - // 6.6 Failed systemd units - if len(sys.FailedUnits) == 0 { - r = append(r, inspector.Pass("system.no_failed_units", "No failed systemd units", systemSub, node, - "no failed units", inspector.High)) + // 6.6 Failed systemd units (only debros-related units count as failures) + var debrosUnits, externalUnits []string + for _, u := range sys.FailedUnits { + if strings.HasPrefix(u, "debros-") || u == "wg-quick@wg0.service" || u == "caddy.service" || u == "coredns.service" { + debrosUnits = append(debrosUnits, u) + } else { + externalUnits = append(externalUnits, u) + } + } + if len(debrosUnits) > 0 { + r = append(r, inspector.Fail("system.no_failed_units", "No failed debros systemd units", systemSub, node, + fmt.Sprintf("failed: %s", strings.Join(debrosUnits, ", ")), inspector.High)) } else { - r = append(r, inspector.Fail("system.no_failed_units", "No failed systemd units", systemSub, node, - fmt.Sprintf("failed: %s", strings.Join(sys.FailedUnits, ", ")), inspector.High)) + r = append(r, inspector.Pass("system.no_failed_units", "No failed debros systemd units", systemSub, node, + "no failed debros units", inspector.High)) + } + if len(externalUnits) > 0 { + r = append(r, inspector.Warn("system.external_failed_units", "External systemd units healthy", systemSub, node, + fmt.Sprintf("external: %s", strings.Join(externalUnits, ", ")), inspector.Low)) } // 6.14 Memory usage diff --git a/pkg/inspector/checks/system_test.go b/pkg/inspector/checks/system_test.go index e33e9af..ae06954 100644 --- a/pkg/inspector/checks/system_test.go +++ b/pkg/inspector/checks/system_test.go @@ -112,17 +112,29 @@ func TestCheckSystem_NameserverServicesNotCheckedOnRegularNode(t *testing.T) { } } -func TestCheckSystem_FailedUnits(t *testing.T) { +func TestCheckSystem_FailedUnits_Debros(t *testing.T) { nd := makeNodeData("1.1.1.1", "node") nd.System = &inspector.SystemData{ Services: map[string]string{}, - FailedUnits: []string{"some-service.service"}, + FailedUnits: []string{"debros-node.service"}, } data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd}) results := CheckSystem(data) expectStatus(t, results, "system.no_failed_units", inspector.StatusFail) } +func TestCheckSystem_FailedUnits_External(t *testing.T) { + nd := makeNodeData("1.1.1.1", "node") + nd.System = &inspector.SystemData{ + Services: map[string]string{}, + FailedUnits: []string{"cloud-init.service"}, + } + data := makeCluster(map[string]*inspector.NodeData{"1.1.1.1": nd}) + results := CheckSystem(data) + expectStatus(t, results, "system.no_failed_units", inspector.StatusPass) + expectStatus(t, results, "system.external_failed_units", inspector.StatusWarn) +} + func TestCheckSystem_Memory(t *testing.T) { tests := []struct { name string diff --git a/pkg/inspector/collector.go b/pkg/inspector/collector.go index 7d1e61b..faae327 100644 --- a/pkg/inspector/collector.go +++ b/pkg/inspector/collector.go @@ -229,6 +229,7 @@ type NetworkData struct { type AnyoneData struct { RelayActive bool // debros-anyone-relay systemd service active ClientActive bool // debros-anyone-client systemd service active + Mode string // "relay" or "client" (from anonrc ORPort presence) ORPortListening bool // port 9001 bound locally SocksListening bool // port 9050 bound locally (client SOCKS5) ControlListening bool // port 9051 bound locally (control port) @@ -624,7 +625,7 @@ curl -sf -X POST 'http://localhost:4501/api/v0/version' 2>/dev/null | python3 -c echo "$SEP" curl -sf 'http://localhost:9094/id' 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('version',''))" 2>/dev/null || echo unknown echo "$SEP" -test -f /home/debros/.orama/data/ipfs/repo/swarm.key && echo yes || echo no +sudo test -f /home/debros/.orama/data/ipfs/repo/swarm.key && echo yes || echo no echo "$SEP" curl -sf -X POST 'http://localhost:4501/api/v0/bootstrap/list' 2>/dev/null | python3 -c "import sys,json; peers=json.load(sys.stdin).get('Peers',[]); print(len(peers))" 2>/dev/null || echo -1 ` @@ -695,7 +696,7 @@ ps -C coredns -o rss= 2>/dev/null | head -1 || echo 0 echo "$SEP" systemctl show coredns --property=NRestarts 2>/dev/null | cut -d= -f2 echo "$SEP" -journalctl -u coredns --no-pager -n 100 --since "5 minutes ago" 2>/dev/null | grep -ciE '(error|ERR)' || echo 0 +journalctl -u coredns --no-pager -n 100 --since "5 minutes ago" 2>/dev/null | grep -iE '(error|ERR)' | grep -cvF 'NOERROR' || echo 0 echo "$SEP" test -f /etc/coredns/Corefile && echo yes || echo no echo "$SEP" @@ -823,9 +824,9 @@ cat /sys/class/net/wg0/mtu 2>/dev/null || echo 0 echo "$SEP" sudo wg show wg0 dump 2>/dev/null echo "$SEP" -test -f /etc/wireguard/wg0.conf && echo yes || echo no +sudo test -f /etc/wireguard/wg0.conf && echo yes || echo no echo "$SEP" -stat -c '%a' /etc/wireguard/wg0.conf 2>/dev/null || echo 000 +sudo stat -c '%a' /etc/wireguard/wg0.conf 2>/dev/null || echo 000 ` res := RunSSH(ctx, node, cmd) if !res.OK() && res.Stdout == "" { @@ -915,7 +916,7 @@ func collectSystem(ctx context.Context, node Node) *SystemData { cmd += ` && echo "$SEP"` cmd += ` && ss -tlnp 2>/dev/null | awk 'NR>1{split($4,a,":"); print a[length(a)]}' | sort -un` cmd += ` && echo "$SEP"` - cmd += ` && ufw status 2>/dev/null | head -1` + cmd += ` && sudo ufw status 2>/dev/null | head -1` cmd += ` && echo "$SEP"` cmd += ` && ps -C debros-node -o user= 2>/dev/null | head -1 || echo unknown` cmd += ` && echo "$SEP"` @@ -1063,7 +1064,7 @@ ip route show default 2>/dev/null | head -1 echo "$SEP" ip route show 10.0.0.0/24 dev wg0 2>/dev/null | head -1 echo "$SEP" -cat /proc/net/snmp 2>/dev/null | awk '/^Tcp:/{getline; print}' +awk '/^Tcp:/{getline; print $12" "$13}' /proc/net/snmp 2>/dev/null; sleep 1; awk '/^Tcp:/{getline; print $12" "$13}' /proc/net/snmp 2>/dev/null echo "$SEP" %s `, pingCmds) @@ -1105,16 +1106,23 @@ echo "$SEP" data.WGRouteExists = strings.TrimSpace(parts[4]) != "" } - // Parse TCP retransmission rate from /proc/net/snmp - // Values line: "Tcp: ..." - // Index: 0 1 2 3 4 5 6 7 8 9 10 11 12 + // Parse TCP retransmission rate from /proc/net/snmp (delta over 1 second) + // Two snapshots: "OutSegs RetransSegs\nOutSegs RetransSegs" if len(parts) > 5 { - fields := strings.Fields(strings.TrimSpace(parts[5])) - if len(fields) >= 13 { - outSegs := parseIntDefault(fields[11], 0) - retransSegs := parseIntDefault(fields[12], 0) - if outSegs > 0 { - data.TCPRetransRate = float64(retransSegs) / float64(outSegs) * 100 + lines := strings.Split(strings.TrimSpace(parts[5]), "\n") + if len(lines) >= 2 { + before := strings.Fields(lines[0]) + after := strings.Fields(lines[1]) + if len(before) >= 2 && len(after) >= 2 { + outBefore := parseIntDefault(before[0], 0) + retBefore := parseIntDefault(before[1], 0) + outAfter := parseIntDefault(after[0], 0) + retAfter := parseIntDefault(after[1], 0) + deltaOut := outAfter - outBefore + deltaRet := retAfter - retBefore + if deltaOut > 0 { + data.TCPRetransRate = float64(deltaRet) / float64(deltaOut) * 100 + } } } } @@ -1154,14 +1162,22 @@ ss -tlnp 2>/dev/null | grep -q ':9050 ' && echo yes || echo no echo "$SEP" ss -tlnp 2>/dev/null | grep -q ':9051 ' && echo yes || echo no echo "$SEP" -# Check bootstrap status from log (last 50 lines) -grep -oP 'Bootstrapped \K[0-9]+' /var/log/anon/notices.log 2>/dev/null | tail -1 || echo 0 +# Check bootstrap status from log. Fall back to notices.log.1 if current log +# is empty (logrotate may have rotated the file without signaling the relay). +BPCT=$(grep -oP 'Bootstrapped \K[0-9]+' /var/log/anon/notices.log 2>/dev/null | tail -1) +if [ -z "$BPCT" ]; then + BPCT=$(grep -oP 'Bootstrapped \K[0-9]+' /var/log/anon/notices.log.1 2>/dev/null | tail -1) +fi +echo "${BPCT:-0}" echo "$SEP" -# Read fingerprint -cat /var/lib/anon/fingerprint 2>/dev/null || echo "" +# Read fingerprint (sudo needed: file is owned by debian-anon with 0600 perms) +sudo cat /var/lib/anon/fingerprint 2>/dev/null || echo "" echo "$SEP" # Read nickname from config grep -oP '^Nickname \K\S+' /etc/anon/anonrc 2>/dev/null || echo "" +echo "$SEP" +# Detect relay vs client mode: check if ORPort is configured in anonrc +grep -qP '^\s*ORPort\s' /etc/anon/anonrc 2>/dev/null && echo relay || echo client ` res := RunSSH(ctx, node, cmd) @@ -1197,6 +1213,9 @@ grep -oP '^Nickname \K\S+' /etc/anon/anonrc 2>/dev/null || echo "" if len(parts) > 8 { data.Nickname = strings.TrimSpace(parts[8]) } + if len(parts) > 9 { + data.Mode = strings.TrimSpace(parts[9]) + } // If neither relay nor client is active, skip further checks if !data.RelayActive && !data.ClientActive { @@ -1226,8 +1245,8 @@ func collectAnyoneReachability(ctx context.Context, data *ClusterData) { var wg sync.WaitGroup for _, nd := range data.Nodes { - if nd.Anyone == nil { - continue + if nd.Anyone == nil || nd.Anyone.Mode == "client" { + continue // skip nodes without Anyone data or in client mode } wg.Add(1) go func(nd *NodeData) { diff --git a/scripts/patches/fix-logrotate.sh b/scripts/patches/fix-logrotate.sh new file mode 100755 index 0000000..0e4561e --- /dev/null +++ b/scripts/patches/fix-logrotate.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# +# Patch: Fix broken logrotate config on all nodes in an environment. +# +# The `anon` apt package ships /etc/logrotate.d/anon with: +# postrotate: invoke-rc.d anon reload +# But we use debros-anyone-relay, not the anon service. So the relay +# never gets SIGHUP after rotation, keeps writing to the old fd, and +# the new notices.log stays empty (causing false "bootstrap=0%" in inspector). +# +# This script replaces the postrotate with: killall -HUP anon +# +# Usage: +# scripts/patches/fix-logrotate.sh --devnet +# scripts/patches/fix-logrotate.sh --testnet +# +set -euo pipefail + +ENV="" +for arg in "$@"; do + case "$arg" in + --devnet) ENV="devnet" ;; + --testnet) ENV="testnet" ;; + -h|--help) + echo "Usage: scripts/patches/fix-logrotate.sh --devnet|--testnet" + exit 0 + ;; + *) echo "Unknown flag: $arg" >&2; exit 1 ;; + esac +done + +if [[ -z "$ENV" ]]; then + echo "ERROR: specify --devnet or --testnet" >&2 + exit 1 +fi + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +CONF="$ROOT_DIR/scripts/remote-nodes.conf" +[[ -f "$CONF" ]] || { echo "ERROR: Missing $CONF" >&2; exit 1; } + +# The fixed logrotate config (base64-encoded to avoid shell escaping issues) +CONFIG_B64=$(base64 <<'EOF' +/var/log/anon/*log { + daily + rotate 5 + compress + delaycompress + missingok + notifempty + create 0640 debian-anon adm + sharedscripts + postrotate + /usr/bin/killall -HUP anon 2>/dev/null || true + endscript +} +EOF +) + +SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10) + +fix_node() { + local user_host="$1" + local password="$2" + local ssh_key="$3" + local b64="$4" + + local cmd="echo '$b64' | base64 -d | sudo tee /etc/logrotate.d/anon > /dev/null && echo PATCH_OK" + + local result + if [[ -n "$ssh_key" ]]; then + expanded_key="${ssh_key/#\~/$HOME}" + result=$(ssh -n "${SSH_OPTS[@]}" -i "$expanded_key" "$user_host" "$cmd" 2>&1) + else + result=$(sshpass -p "$password" ssh -n "${SSH_OPTS[@]}" "$user_host" "$cmd" 2>&1) + fi + + if echo "$result" | grep -q "PATCH_OK"; then + echo " OK $user_host" + else + echo " ERR $user_host: $result" + fi +} + +# Parse nodes from conf +HOSTS=() +PASSES=() +KEYS=() + +while IFS='|' read -r env host pass role key; do + [[ -z "$env" || "$env" == \#* ]] && continue + env="${env%%#*}" + env="$(echo "$env" | xargs)" + [[ "$env" != "$ENV" ]] && continue + HOSTS+=("$host") + PASSES+=("$pass") + KEYS+=("${key:-}") +done < "$CONF" + +echo "== fix-logrotate ($ENV) — ${#HOSTS[@]} nodes ==" + +for i in "${!HOSTS[@]}"; do + fix_node "${HOSTS[$i]}" "${PASSES[$i]}" "${KEYS[$i]}" "$CONFIG_B64" & +done + +wait +echo "Done." diff --git a/scripts/patches/fix-ufw-orport.sh b/scripts/patches/fix-ufw-orport.sh new file mode 100755 index 0000000..6844b25 --- /dev/null +++ b/scripts/patches/fix-ufw-orport.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# +# Patch: Open ORPort 9001 in UFW on all relay-mode nodes. +# +# The upgrade path resets UFW and rebuilds rules, but doesn't include +# port 9001 because the --anyone-relay flag isn't passed during upgrade. +# This script adds the missing rule on all relay nodes (not nameservers). +# +# Usage: +# scripts/patches/fix-ufw-orport.sh --devnet +# scripts/patches/fix-ufw-orport.sh --testnet +# +set -euo pipefail + +ENV="" +for arg in "$@"; do + case "$arg" in + --devnet) ENV="devnet" ;; + --testnet) ENV="testnet" ;; + -h|--help) + echo "Usage: scripts/patches/fix-ufw-orport.sh --devnet|--testnet" + exit 0 + ;; + *) echo "Unknown flag: $arg" >&2; exit 1 ;; + esac +done + +if [[ -z "$ENV" ]]; then + echo "ERROR: specify --devnet or --testnet" >&2 + exit 1 +fi + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +CONF="$ROOT_DIR/scripts/remote-nodes.conf" +[[ -f "$CONF" ]] || { echo "ERROR: Missing $CONF" >&2; exit 1; } + +SSH_OPTS=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10) + +fix_node() { + local user_host="$1" + local password="$2" + local ssh_key="$3" + + local cmd="sudo ufw allow 9001/tcp >/dev/null 2>&1 && echo PATCH_OK" + + local result + if [[ -n "$ssh_key" ]]; then + expanded_key="${ssh_key/#\~/$HOME}" + result=$(ssh -n "${SSH_OPTS[@]}" -i "$expanded_key" "$user_host" "$cmd" 2>&1) + else + result=$(sshpass -p "$password" ssh -n "${SSH_OPTS[@]}" "$user_host" "$cmd" 2>&1) + fi + + if echo "$result" | grep -q "PATCH_OK"; then + echo " OK $user_host" + else + echo " ERR $user_host: $result" + fi +} + +# Parse nodes from conf — only relay nodes (role=node), skip nameservers +HOSTS=() +PASSES=() +KEYS=() + +while IFS='|' read -r env host pass role key; do + [[ -z "$env" || "$env" == \#* ]] && continue + env="${env%%#*}" + env="$(echo "$env" | xargs)" + [[ "$env" != "$ENV" ]] && continue + role="$(echo "$role" | xargs)" + [[ "$role" != "node" ]] && continue # skip nameservers + HOSTS+=("$host") + PASSES+=("$pass") + KEYS+=("${key:-}") +done < "$CONF" + +echo "== fix-ufw-orport ($ENV) — ${#HOSTS[@]} relay nodes ==" + +for i in "${!HOSTS[@]}"; do + fix_node "${HOSTS[$i]}" "${PASSES[$i]}" "${KEYS[$i]}" & +done + +wait +echo "Done."