orama/pkg/inspector/checks/namespace.go
2026-02-11 09:53:46 +02:00

156 lines
5.1 KiB
Go

package checks
import (
"fmt"
"github.com/DeBrosOfficial/network/pkg/inspector"
)
func init() {
inspector.RegisterChecker("namespace", CheckNamespace)
}
const nsSub = "namespace"
// CheckNamespace runs all namespace-level health checks.
func CheckNamespace(data *inspector.ClusterData) []inspector.CheckResult {
var results []inspector.CheckResult
for _, nd := range data.Nodes {
if len(nd.Namespaces) == 0 {
continue
}
results = append(results, checkNamespacesPerNode(nd)...)
}
results = append(results, checkNamespacesCrossNode(data)...)
return results
}
func checkNamespacesPerNode(nd *inspector.NodeData) []inspector.CheckResult {
var r []inspector.CheckResult
node := nd.Node.Name()
for _, ns := range nd.Namespaces {
prefix := fmt.Sprintf("ns.%s", ns.Name)
// RQLite health
if ns.RQLiteUp {
r = append(r, inspector.Pass(prefix+".rqlite_up", fmt.Sprintf("Namespace %s RQLite responding", ns.Name), nsSub, node,
fmt.Sprintf("port_base=%d state=%s", ns.PortBase, ns.RQLiteState), inspector.Critical))
} else {
r = append(r, inspector.Fail(prefix+".rqlite_up", fmt.Sprintf("Namespace %s RQLite responding", ns.Name), nsSub, node,
fmt.Sprintf("port_base=%d not responding", ns.PortBase), inspector.Critical))
}
// RQLite Raft state
if ns.RQLiteUp {
switch ns.RQLiteState {
case "Leader", "Follower":
r = append(r, inspector.Pass(prefix+".rqlite_state", fmt.Sprintf("Namespace %s RQLite raft state valid", ns.Name), nsSub, node,
fmt.Sprintf("state=%s", ns.RQLiteState), inspector.Critical))
case "Candidate":
r = append(r, inspector.Warn(prefix+".rqlite_state", fmt.Sprintf("Namespace %s RQLite raft state valid", ns.Name), nsSub, node,
"state=Candidate (election in progress)", inspector.Critical))
default:
r = append(r, inspector.Fail(prefix+".rqlite_state", fmt.Sprintf("Namespace %s RQLite raft state valid", ns.Name), nsSub, node,
fmt.Sprintf("state=%s", ns.RQLiteState), inspector.Critical))
}
}
// RQLite readiness
if ns.RQLiteReady {
r = append(r, inspector.Pass(prefix+".rqlite_ready", fmt.Sprintf("Namespace %s RQLite ready", ns.Name), nsSub, node,
"/readyz OK", inspector.Critical))
} else if ns.RQLiteUp {
r = append(r, inspector.Fail(prefix+".rqlite_ready", fmt.Sprintf("Namespace %s RQLite ready", ns.Name), nsSub, node,
"/readyz failed", inspector.Critical))
}
// Olric health
if ns.OlricUp {
r = append(r, inspector.Pass(prefix+".olric_up", fmt.Sprintf("Namespace %s Olric port listening", ns.Name), nsSub, node,
"memberlist port bound", inspector.High))
} else {
r = append(r, inspector.Fail(prefix+".olric_up", fmt.Sprintf("Namespace %s Olric port listening", ns.Name), nsSub, node,
"memberlist port not bound", inspector.High))
}
// Gateway health
if ns.GatewayUp {
r = append(r, inspector.Pass(prefix+".gateway_up", fmt.Sprintf("Namespace %s Gateway responding", ns.Name), nsSub, node,
fmt.Sprintf("HTTP status=%d", ns.GatewayStatus), inspector.High))
} else {
r = append(r, inspector.Fail(prefix+".gateway_up", fmt.Sprintf("Namespace %s Gateway responding", ns.Name), nsSub, node,
fmt.Sprintf("HTTP status=%d", ns.GatewayStatus), inspector.High))
}
}
return r
}
func checkNamespacesCrossNode(data *inspector.ClusterData) []inspector.CheckResult {
var r []inspector.CheckResult
// Collect all namespace names across nodes
nsNodes := map[string]int{} // namespace name → count of nodes running it
nsHealthy := map[string]int{} // namespace name → count of nodes where all services are up
for _, nd := range data.Nodes {
for _, ns := range nd.Namespaces {
nsNodes[ns.Name]++
if ns.RQLiteUp && ns.OlricUp && ns.GatewayUp {
nsHealthy[ns.Name]++
}
}
}
for name, total := range nsNodes {
healthy := nsHealthy[name]
if healthy == total {
r = append(r, inspector.Pass(
fmt.Sprintf("ns.%s.all_healthy", name),
fmt.Sprintf("Namespace %s healthy on all nodes", name),
nsSub, "",
fmt.Sprintf("%d/%d nodes fully healthy", healthy, total),
inspector.Critical))
} else {
r = append(r, inspector.Fail(
fmt.Sprintf("ns.%s.all_healthy", name),
fmt.Sprintf("Namespace %s healthy on all nodes", name),
nsSub, "",
fmt.Sprintf("%d/%d nodes fully healthy", healthy, total),
inspector.Critical))
}
// Check namespace has quorum (>= N/2+1 RQLite instances)
rqliteUp := 0
for _, nd := range data.Nodes {
for _, ns := range nd.Namespaces {
if ns.Name == name && ns.RQLiteUp {
rqliteUp++
}
}
}
quorumNeeded := total/2 + 1
if rqliteUp >= quorumNeeded {
r = append(r, inspector.Pass(
fmt.Sprintf("ns.%s.quorum", name),
fmt.Sprintf("Namespace %s RQLite quorum", name),
nsSub, "",
fmt.Sprintf("rqlite_up=%d/%d quorum_needed=%d", rqliteUp, total, quorumNeeded),
inspector.Critical))
} else {
r = append(r, inspector.Fail(
fmt.Sprintf("ns.%s.quorum", name),
fmt.Sprintf("Namespace %s RQLite quorum", name),
nsSub, "",
fmt.Sprintf("rqlite_up=%d/%d quorum_needed=%d (QUORUM LOST)", rqliteUp, total, quorumNeeded),
inspector.Critical))
}
}
return r
}