orama/pkg/deployments/health/checker_test.go
anonpenguin23 c499b2d76e Enhance health checker and deployment handling
- Added support for "degraded" deployment status in types.
- Updated health checker initialization to include process manager and node ID.
- Refactored health checker tests to accommodate new process manager functionality.
- Implemented logic to handle unhealthy deployments, including restart and failure marking.
- Enhanced deployment reconciliation to manage under-replicated scenarios.
- Updated gateway handlers and middleware to consider "degraded" status in deployment queries.
2026-02-20 09:44:07 +02:00

939 lines
26 KiB
Go

package health
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"reflect"
"strings"
"sync"
"testing"
"time"
"github.com/DeBrosOfficial/network/pkg/deployments"
"go.uber.org/zap"
)
// ---------------------------------------------------------------------------
// Mock database
// ---------------------------------------------------------------------------
// queryCall records the arguments passed to a Query invocation.
type queryCall struct {
query string
args []interface{}
}
// execCall records the arguments passed to an Exec invocation.
type execCall struct {
query string
args []interface{}
}
// mockDB implements database.Database with configurable responses.
type mockDB struct {
mu sync.Mutex
// Query handling ---------------------------------------------------
queryFunc func(dest interface{}, query string, args ...interface{}) error
queryCalls []queryCall
// Exec handling ----------------------------------------------------
execFunc func(query string, args ...interface{}) (interface{}, error)
execCalls []execCall
}
func (m *mockDB) Query(_ context.Context, dest interface{}, query string, args ...interface{}) error {
m.mu.Lock()
m.queryCalls = append(m.queryCalls, queryCall{query: query, args: args})
fn := m.queryFunc
m.mu.Unlock()
if fn != nil {
return fn(dest, query, args...)
}
return nil
}
func (m *mockDB) QueryOne(_ context.Context, dest interface{}, query string, args ...interface{}) error {
m.mu.Lock()
m.queryCalls = append(m.queryCalls, queryCall{query: query, args: args})
m.mu.Unlock()
return nil
}
func (m *mockDB) Exec(_ context.Context, query string, args ...interface{}) (interface{}, error) {
m.mu.Lock()
m.execCalls = append(m.execCalls, execCall{query: query, args: args})
fn := m.execFunc
m.mu.Unlock()
if fn != nil {
return fn(query, args...)
}
return nil, nil
}
// getExecCalls returns a snapshot of the recorded Exec calls.
func (m *mockDB) getExecCalls() []execCall {
m.mu.Lock()
defer m.mu.Unlock()
out := make([]execCall, len(m.execCalls))
copy(out, m.execCalls)
return out
}
// getQueryCalls returns a snapshot of the recorded Query calls.
func (m *mockDB) getQueryCalls() []queryCall {
m.mu.Lock()
defer m.mu.Unlock()
out := make([]queryCall, len(m.queryCalls))
copy(out, m.queryCalls)
return out
}
// ---------------------------------------------------------------------------
// Mock process manager
// ---------------------------------------------------------------------------
type mockProcessManager struct {
mu sync.Mutex
restartCalls []string // deployment IDs
restartErr error
stopCalls []string // deployment IDs
stopErr error
}
func (m *mockProcessManager) Restart(_ context.Context, dep *deployments.Deployment) error {
m.mu.Lock()
m.restartCalls = append(m.restartCalls, dep.ID)
m.mu.Unlock()
return m.restartErr
}
func (m *mockProcessManager) Stop(_ context.Context, dep *deployments.Deployment) error {
m.mu.Lock()
m.stopCalls = append(m.stopCalls, dep.ID)
m.mu.Unlock()
return m.stopErr
}
func (m *mockProcessManager) getRestartCalls() []string {
m.mu.Lock()
defer m.mu.Unlock()
out := make([]string, len(m.restartCalls))
copy(out, m.restartCalls)
return out
}
func (m *mockProcessManager) getStopCalls() []string {
m.mu.Lock()
defer m.mu.Unlock()
out := make([]string, len(m.stopCalls))
copy(out, m.stopCalls)
return out
}
// ---------------------------------------------------------------------------
// Helper: populate a *[]T dest via reflection so the mock can return rows.
// ---------------------------------------------------------------------------
// appendRows appends rows to dest (a *[]SomeStruct) by creating new elements
// of the destination's element type and copying field values by name.
func appendRows(dest interface{}, rows []map[string]interface{}) {
dv := reflect.ValueOf(dest).Elem() // []T
elemType := dv.Type().Elem() // T
for _, row := range rows {
elem := reflect.New(elemType).Elem()
for name, val := range row {
f := elem.FieldByName(name)
if f.IsValid() && f.CanSet() {
f.Set(reflect.ValueOf(val))
}
}
dv = reflect.Append(dv, elem)
}
reflect.ValueOf(dest).Elem().Set(dv)
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
// ---- a) NewHealthChecker --------------------------------------------------
func TestNewHealthChecker_NonNil(t *testing.T) {
db := &mockDB{}
logger := zap.NewNop()
pm := &mockProcessManager{}
hc := NewHealthChecker(db, logger, "node-1", pm)
if hc == nil {
t.Fatal("expected non-nil HealthChecker")
}
if hc.db != db {
t.Error("expected db to be stored")
}
if hc.logger != logger {
t.Error("expected logger to be stored")
}
if hc.workers != 10 {
t.Errorf("expected default workers=10, got %d", hc.workers)
}
if hc.nodeID != "node-1" {
t.Errorf("expected nodeID='node-1', got %q", hc.nodeID)
}
if hc.processManager != pm {
t.Error("expected processManager to be stored")
}
if hc.states == nil {
t.Error("expected states map to be initialized")
}
}
// ---- b) checkDeployment ---------------------------------------------------
func TestCheckDeployment_StaticDeployment(t *testing.T) {
db := &mockDB{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
dep := deploymentRow{
ID: "dep-1",
Name: "static-site",
Port: 0, // static deployment
}
if !hc.checkDeployment(context.Background(), dep) {
t.Error("static deployment (port 0) should always be healthy")
}
}
func TestCheckDeployment_HealthyEndpoint(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/healthz" {
w.WriteHeader(http.StatusOK)
return
}
w.WriteHeader(http.StatusNotFound)
}))
defer srv.Close()
port := serverPort(t, srv)
db := &mockDB{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
dep := deploymentRow{
ID: "dep-2",
Name: "web-app",
Port: port,
HealthCheckPath: "/healthz",
}
if !hc.checkDeployment(context.Background(), dep) {
t.Error("expected healthy for 200 response")
}
}
func TestCheckDeployment_UnhealthyEndpoint(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer srv.Close()
port := serverPort(t, srv)
db := &mockDB{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
dep := deploymentRow{
ID: "dep-3",
Name: "broken-app",
Port: port,
HealthCheckPath: "/healthz",
}
if hc.checkDeployment(context.Background(), dep) {
t.Error("expected unhealthy for 500 response")
}
}
func TestCheckDeployment_UnreachableEndpoint(t *testing.T) {
db := &mockDB{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
dep := deploymentRow{
ID: "dep-4",
Name: "ghost-app",
Port: 19999, // nothing listening here
HealthCheckPath: "/healthz",
}
if hc.checkDeployment(context.Background(), dep) {
t.Error("expected unhealthy for unreachable endpoint")
}
}
// ---- c) checkAllDeployments query -----------------------------------------
func TestCheckAllDeployments_QueriesLocalReplicas(t *testing.T) {
db := &mockDB{}
hc := NewHealthChecker(db, zap.NewNop(), "node-abc", nil)
hc.checkAllDeployments(context.Background())
calls := db.getQueryCalls()
if len(calls) == 0 {
t.Fatal("expected at least one query call")
}
q := calls[0].query
if !strings.Contains(q, "deployment_replicas") {
t.Errorf("expected query to join deployment_replicas, got: %s", q)
}
if !strings.Contains(q, "dr.node_id = ?") {
t.Errorf("expected query to filter by dr.node_id, got: %s", q)
}
if !strings.Contains(q, "'degraded'") {
t.Errorf("expected query to include 'degraded' status, got: %s", q)
}
// Verify nodeID was passed as the bind parameter
if len(calls[0].args) == 0 {
t.Fatal("expected query args")
}
if nodeID, ok := calls[0].args[0].(string); !ok || nodeID != "node-abc" {
t.Errorf("expected nodeID arg 'node-abc', got %v", calls[0].args[0])
}
}
// ---- d) handleUnhealthy ---------------------------------------------------
func TestHandleUnhealthy_RestartsBeforeFailure(t *testing.T) {
db := &mockDB{}
pm := &mockProcessManager{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", pm)
dep := deploymentRow{
ID: "dep-restart",
Namespace: "test",
Name: "my-app",
Type: "nextjs",
Port: 10001,
RestartPolicy: "on-failure",
MaxRestartCount: 3,
ReplicaStatus: "active",
}
ctx := context.Background()
// Drive 3 consecutive unhealthy checks -> should trigger restart
for i := 0; i < consecutiveFailuresThreshold; i++ {
hc.handleUnhealthy(ctx, dep)
}
// Verify restart was called
restarts := pm.getRestartCalls()
if len(restarts) != 1 {
t.Fatalf("expected 1 restart call, got %d", len(restarts))
}
if restarts[0] != "dep-restart" {
t.Errorf("expected restart for 'dep-restart', got %q", restarts[0])
}
// Verify no replica status UPDATE was issued (only event INSERT)
execCalls := db.getExecCalls()
for _, call := range execCalls {
if strings.Contains(call.query, "UPDATE deployment_replicas") {
t.Error("should not update replica status when restart succeeds")
}
}
}
func TestHandleUnhealthy_MarksReplicaFailedAfterRestartLimit(t *testing.T) {
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
// Return count of 1 active replica (so deployment becomes degraded, not failed)
if strings.Contains(query, "COUNT(*)") {
appendRows(dest, []map[string]interface{}{
{"Count": 1},
})
}
return nil
},
}
pm := &mockProcessManager{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", pm)
dep := deploymentRow{
ID: "dep-limited",
Namespace: "test",
Name: "my-app",
Type: "nextjs",
Port: 10001,
RestartPolicy: "on-failure",
MaxRestartCount: 1, // Only 1 restart allowed
ReplicaStatus: "active",
}
ctx := context.Background()
// First 3 misses -> restart (limit=1, attempt 1)
for i := 0; i < consecutiveFailuresThreshold; i++ {
hc.handleUnhealthy(ctx, dep)
}
// Should have restarted once
if len(pm.getRestartCalls()) != 1 {
t.Fatalf("expected 1 restart call, got %d", len(pm.getRestartCalls()))
}
// Next 3 misses -> restart limit exhausted, mark replica failed
for i := 0; i < consecutiveFailuresThreshold; i++ {
hc.handleUnhealthy(ctx, dep)
}
// Verify replica was marked failed
execCalls := db.getExecCalls()
foundReplicaUpdate := false
foundDeploymentUpdate := false
for _, call := range execCalls {
if strings.Contains(call.query, "UPDATE deployment_replicas") && strings.Contains(call.query, "'failed'") {
foundReplicaUpdate = true
}
if strings.Contains(call.query, "UPDATE deployments") {
foundDeploymentUpdate = true
}
}
if !foundReplicaUpdate {
t.Error("expected UPDATE deployment_replicas SET status = 'failed'")
}
if !foundDeploymentUpdate {
t.Error("expected UPDATE deployments to recalculate status")
}
// Should NOT have restarted again (limit was 1)
if len(pm.getRestartCalls()) != 1 {
t.Errorf("expected still 1 restart call, got %d", len(pm.getRestartCalls()))
}
}
func TestHandleUnhealthy_NeverRestart(t *testing.T) {
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
if strings.Contains(query, "COUNT(*)") {
appendRows(dest, []map[string]interface{}{
{"Count": 0},
})
}
return nil
},
}
pm := &mockProcessManager{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", pm)
dep := deploymentRow{
ID: "dep-never",
Namespace: "test",
Name: "no-restart-app",
Type: "nextjs",
Port: 10001,
RestartPolicy: "never",
MaxRestartCount: 10,
ReplicaStatus: "active",
}
ctx := context.Background()
// 3 misses should immediately mark failed without restart
for i := 0; i < consecutiveFailuresThreshold; i++ {
hc.handleUnhealthy(ctx, dep)
}
// No restart calls
if len(pm.getRestartCalls()) != 0 {
t.Errorf("expected 0 restart calls with policy=never, got %d", len(pm.getRestartCalls()))
}
// Verify replica was marked failed
execCalls := db.getExecCalls()
foundReplicaUpdate := false
for _, call := range execCalls {
if strings.Contains(call.query, "UPDATE deployment_replicas") && strings.Contains(call.query, "'failed'") {
foundReplicaUpdate = true
}
}
if !foundReplicaUpdate {
t.Error("expected replica to be marked failed immediately")
}
}
// ---- e) handleHealthy -----------------------------------------------------
func TestHandleHealthy_ResetsCounters(t *testing.T) {
db := &mockDB{}
pm := &mockProcessManager{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", pm)
dep := deploymentRow{
ID: "dep-reset",
Namespace: "test",
Name: "flaky-app",
Type: "nextjs",
Port: 10001,
RestartPolicy: "on-failure",
MaxRestartCount: 3,
ReplicaStatus: "active",
}
ctx := context.Background()
// 2 misses (below threshold)
hc.handleUnhealthy(ctx, dep)
hc.handleUnhealthy(ctx, dep)
// Health recovered
hc.handleHealthy(ctx, dep)
// 2 more misses — should NOT trigger restart (counters were reset)
hc.handleUnhealthy(ctx, dep)
hc.handleUnhealthy(ctx, dep)
if len(pm.getRestartCalls()) != 0 {
t.Errorf("expected 0 restart calls after counter reset, got %d", len(pm.getRestartCalls()))
}
}
func TestHandleHealthy_RecoversFailedReplica(t *testing.T) {
callCount := 0
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
if strings.Contains(query, "COUNT(*)") {
callCount++
if callCount == 1 {
// First COUNT: over-replication check — 1 active (under-replicated, allow recovery)
appendRows(dest, []map[string]interface{}{{"Count": 1}})
} else {
// Second COUNT: recalculateDeploymentStatus — now 2 active after recovery
appendRows(dest, []map[string]interface{}{{"Count": 2}})
}
}
return nil
},
}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
dep := deploymentRow{
ID: "dep-recover",
Namespace: "test",
Name: "recovered-app",
ReplicaStatus: "failed", // Was failed, now passing health check
}
ctx := context.Background()
hc.handleHealthy(ctx, dep)
// Verify replica was updated back to 'active'
execCalls := db.getExecCalls()
foundReplicaRecovery := false
foundEvent := false
for _, call := range execCalls {
if strings.Contains(call.query, "UPDATE deployment_replicas") && strings.Contains(call.query, "'active'") {
foundReplicaRecovery = true
}
if strings.Contains(call.query, "replica_recovered") {
foundEvent = true
}
}
if !foundReplicaRecovery {
t.Error("expected UPDATE deployment_replicas SET status = 'active'")
}
if !foundEvent {
t.Error("expected replica_recovered event")
}
}
func TestHandleHealthy_StopsZombieReplicaWhenAlreadyReplaced(t *testing.T) {
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
if strings.Contains(query, "COUNT(*)") {
// 2 active replicas already exist — this replica was replaced
appendRows(dest, []map[string]interface{}{{"Count": 2}})
}
return nil
},
}
pm := &mockProcessManager{}
hc := NewHealthChecker(db, zap.NewNop(), "node-zombie", pm)
dep := deploymentRow{
ID: "dep-zombie",
Namespace: "test",
Name: "zombie-app",
Type: "nextjs",
Port: 10001,
ReplicaStatus: "failed", // Was failed, but process is running (systemd Restart=always)
}
ctx := context.Background()
hc.handleHealthy(ctx, dep)
// Verify Stop was called (not Restart)
stopCalls := pm.getStopCalls()
if len(stopCalls) != 1 {
t.Fatalf("expected 1 Stop call, got %d", len(stopCalls))
}
if stopCalls[0] != "dep-zombie" {
t.Errorf("expected Stop for 'dep-zombie', got %q", stopCalls[0])
}
// Verify replica row was DELETED (not updated to active)
execCalls := db.getExecCalls()
foundDelete := false
foundZombieEvent := false
for _, call := range execCalls {
if strings.Contains(call.query, "DELETE FROM deployment_replicas") {
foundDelete = true
// Verify the right deployment and node
if len(call.args) >= 2 {
if call.args[0] != "dep-zombie" || call.args[1] != "node-zombie" {
t.Errorf("DELETE args: got (%v, %v), want (dep-zombie, node-zombie)", call.args[0], call.args[1])
}
}
}
if strings.Contains(call.query, "zombie_replica_stopped") {
foundZombieEvent = true
}
// Should NOT recover to active
if strings.Contains(call.query, "UPDATE deployment_replicas") && strings.Contains(call.query, "'active'") {
t.Error("should NOT update replica to active when it's a zombie")
}
}
if !foundDelete {
t.Error("expected DELETE FROM deployment_replicas for zombie replica")
}
if !foundZombieEvent {
t.Error("expected zombie_replica_stopped event")
}
// Verify no Restart calls
if len(pm.getRestartCalls()) != 0 {
t.Errorf("expected 0 restart calls, got %d", len(pm.getRestartCalls()))
}
}
// ---- f) recordHealthCheck -------------------------------------------------
func TestRecordHealthCheck_IncludesNodeID(t *testing.T) {
db := &mockDB{}
hc := NewHealthChecker(db, zap.NewNop(), "node-xyz", nil)
hc.recordHealthCheck(context.Background(), "dep-1", true)
execCalls := db.getExecCalls()
if len(execCalls) != 1 {
t.Fatalf("expected 1 exec call, got %d", len(execCalls))
}
q := execCalls[0].query
if !strings.Contains(q, "node_id") {
t.Errorf("expected INSERT to include node_id column, got: %s", q)
}
// Verify node_id is the second arg (after deployment_id)
if len(execCalls[0].args) < 2 {
t.Fatal("expected at least 2 args")
}
if nodeID, ok := execCalls[0].args[1].(string); !ok || nodeID != "node-xyz" {
t.Errorf("expected node_id arg 'node-xyz', got %v", execCalls[0].args[1])
}
}
// ---- g) GetHealthStatus ---------------------------------------------------
func TestGetHealthStatus_ReturnsChecks(t *testing.T) {
now := time.Now().Truncate(time.Second)
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
appendRows(dest, []map[string]interface{}{
{"Status": "healthy", "CheckedAt": now, "ResponseTimeMs": 42},
{"Status": "unhealthy", "CheckedAt": now.Add(-30 * time.Second), "ResponseTimeMs": 5001},
})
return nil
},
}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
checks, err := hc.GetHealthStatus(context.Background(), "dep-1", 10)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(checks) != 2 {
t.Fatalf("expected 2 health checks, got %d", len(checks))
}
if checks[0].Status != "healthy" {
t.Errorf("checks[0].Status = %q, want %q", checks[0].Status, "healthy")
}
if checks[0].ResponseTimeMs != 42 {
t.Errorf("checks[0].ResponseTimeMs = %d, want 42", checks[0].ResponseTimeMs)
}
if !checks[0].CheckedAt.Equal(now) {
t.Errorf("checks[0].CheckedAt = %v, want %v", checks[0].CheckedAt, now)
}
if checks[1].Status != "unhealthy" {
t.Errorf("checks[1].Status = %q, want %q", checks[1].Status, "unhealthy")
}
}
func TestGetHealthStatus_EmptyList(t *testing.T) {
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
return nil
},
}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
checks, err := hc.GetHealthStatus(context.Background(), "dep-empty", 10)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(checks) != 0 {
t.Errorf("expected 0 health checks, got %d", len(checks))
}
}
func TestGetHealthStatus_DatabaseError(t *testing.T) {
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
return fmt.Errorf("connection refused")
},
}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
_, err := hc.GetHealthStatus(context.Background(), "dep-err", 10)
if err == nil {
t.Fatal("expected error from GetHealthStatus")
}
if !strings.Contains(err.Error(), "connection refused") {
t.Errorf("expected 'connection refused' in error, got: %v", err)
}
}
// ---- h) reconcileDeployments ----------------------------------------------
type mockReconciler struct {
mu sync.Mutex
selectCalls []string // primaryNodeIDs
selectResult []string
selectErr error
updateStatusCalls []struct {
deploymentID string
nodeID string
status deployments.ReplicaStatus
}
}
func (m *mockReconciler) SelectReplicaNodes(_ context.Context, primaryNodeID string, _ int) ([]string, error) {
m.mu.Lock()
m.selectCalls = append(m.selectCalls, primaryNodeID)
m.mu.Unlock()
return m.selectResult, m.selectErr
}
func (m *mockReconciler) UpdateReplicaStatus(_ context.Context, deploymentID, nodeID string, status deployments.ReplicaStatus) error {
m.mu.Lock()
m.updateStatusCalls = append(m.updateStatusCalls, struct {
deploymentID string
nodeID string
status deployments.ReplicaStatus
}{deploymentID, nodeID, status})
m.mu.Unlock()
return nil
}
type mockProvisioner struct {
mu sync.Mutex
setupCalls []struct {
deploymentID string
nodeID string
}
}
func (m *mockProvisioner) SetupDynamicReplica(_ context.Context, dep *deployments.Deployment, nodeID string) {
m.mu.Lock()
m.setupCalls = append(m.setupCalls, struct {
deploymentID string
nodeID string
}{dep.ID, nodeID})
m.mu.Unlock()
}
func TestReconcileDeployments_UnderReplicated(t *testing.T) {
// Start a mock RQLite status endpoint that reports Leader
leaderSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`{"store":{"raft":{"state":"Leader"}}}`))
}))
defer leaderSrv.Close()
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
if strings.Contains(query, "active_replicas") {
appendRows(dest, []map[string]interface{}{
{
"ID": "dep-under",
"Namespace": "test",
"Name": "under-app",
"Type": "nextjs",
"HomeNodeID": "node-home",
"ContentCID": "cid-123",
"BuildCID": "",
"Environment": "",
"Port": 10001,
"HealthCheckPath": "/health",
"MemoryLimitMB": 256,
"CPULimitPercent": 50,
"RestartPolicy": "on-failure",
"MaxRestartCount": 10,
"ActiveReplicas": 1, // Under-replicated (desired=2)
},
})
}
return nil
},
}
rc := &mockReconciler{selectResult: []string{"node-new"}}
rp := &mockProvisioner{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
hc.SetReconciler(leaderSrv.URL, rc, rp)
hc.reconcileDeployments(context.Background())
// Wait briefly for the goroutine to fire
time.Sleep(50 * time.Millisecond)
// Verify SelectReplicaNodes was called
rc.mu.Lock()
selectCount := len(rc.selectCalls)
rc.mu.Unlock()
if selectCount != 1 {
t.Fatalf("expected 1 SelectReplicaNodes call, got %d", selectCount)
}
// Verify SetupDynamicReplica was called
rp.mu.Lock()
setupCount := len(rp.setupCalls)
rp.mu.Unlock()
if setupCount != 1 {
t.Fatalf("expected 1 SetupDynamicReplica call, got %d", setupCount)
}
rp.mu.Lock()
if rp.setupCalls[0].deploymentID != "dep-under" {
t.Errorf("expected deployment 'dep-under', got %q", rp.setupCalls[0].deploymentID)
}
if rp.setupCalls[0].nodeID != "node-new" {
t.Errorf("expected node 'node-new', got %q", rp.setupCalls[0].nodeID)
}
rp.mu.Unlock()
}
func TestReconcileDeployments_FullyReplicated(t *testing.T) {
leaderSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`{"store":{"raft":{"state":"Leader"}}}`))
}))
defer leaderSrv.Close()
db := &mockDB{
queryFunc: func(dest interface{}, query string, args ...interface{}) error {
if strings.Contains(query, "active_replicas") {
appendRows(dest, []map[string]interface{}{
{
"ID": "dep-full",
"Namespace": "test",
"Name": "full-app",
"Type": "nextjs",
"HomeNodeID": "node-home",
"ContentCID": "cid-456",
"BuildCID": "",
"Environment": "",
"Port": 10002,
"HealthCheckPath": "/health",
"MemoryLimitMB": 256,
"CPULimitPercent": 50,
"RestartPolicy": "on-failure",
"MaxRestartCount": 10,
"ActiveReplicas": 2, // Fully replicated
},
})
}
return nil
},
}
rc := &mockReconciler{selectResult: []string{"node-new"}}
rp := &mockProvisioner{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
hc.SetReconciler(leaderSrv.URL, rc, rp)
hc.reconcileDeployments(context.Background())
time.Sleep(50 * time.Millisecond)
// Should NOT trigger re-replication
rc.mu.Lock()
if len(rc.selectCalls) != 0 {
t.Errorf("expected 0 SelectReplicaNodes calls for fully replicated deployment, got %d", len(rc.selectCalls))
}
rc.mu.Unlock()
}
func TestReconcileDeployments_NotLeader(t *testing.T) {
// Not-leader RQLite status
followerSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`{"store":{"raft":{"state":"Follower"}}}`))
}))
defer followerSrv.Close()
db := &mockDB{}
rc := &mockReconciler{}
rp := &mockProvisioner{}
hc := NewHealthChecker(db, zap.NewNop(), "node-1", nil)
hc.SetReconciler(followerSrv.URL, rc, rp)
hc.reconcileDeployments(context.Background())
// Should not query deployments at all
calls := db.getQueryCalls()
if len(calls) != 0 {
t.Errorf("expected 0 query calls on follower, got %d", len(calls))
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
// serverPort extracts the port number from an httptest.Server.
func serverPort(t *testing.T, srv *httptest.Server) int {
t.Helper()
addr := srv.Listener.Addr().String()
var port int
_, err := fmt.Sscanf(addr[strings.LastIndex(addr, ":")+1:], "%d", &port)
if err != nil {
t.Fatalf("failed to parse port from %q: %v", addr, err)
}
return port
}