orama/pkg/inspector/analyzer.go

package inspector

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"os"
	"sort"
	"strings"
	"sync"
	"time"
)

// System prompt with architecture context and remediation knowledge.
const systemPrompt = `You are a distributed systems expert analyzing health check results for an Orama Network cluster.

## Architecture
- **RQLite**: Raft consensus SQLite database. Requires N/2+1 quorum for writes. Each node runs one instance.
- **Olric**: Distributed in-memory cache using memberlist protocol. Coordinates via elected coordinator node.
- **IPFS**: Decentralized storage with private swarm (swarm key). Runs Kubo daemon + IPFS Cluster for pinning.
- **CoreDNS + Caddy**: DNS resolution (port 53) and TLS termination (ports 80/443). Only on nameserver nodes.
- **WireGuard**: Mesh VPN connecting all nodes via 10.0.0.0/8 on port 51820. All inter-node traffic goes over WG.
- **Namespaces**: Isolated tenant environments. Each namespace runs its own RQLite + Olric + Gateway on a 5-port block (base+0=RQLite HTTP, +1=Raft, +2=Olric HTTP, +3=Memberlist, +4=Gateway).

## Common Failure Patterns
- If WireGuard is down on a node, ALL services on that node will appear unreachable from other nodes.
- RQLite losing quorum (< N/2+1 voters) means the cluster cannot accept writes. Reads may still work.
- Olric suspects/flapping in logs usually means unstable network between nodes (check WireGuard first).
- IPFS swarm peers dropping to 0 means the node is isolated from the private swarm.
- High TCP retransmission (>2%) indicates packet loss, often due to WireGuard MTU issues.

## Service Management
- ALWAYS use the CLI for service operations: ` + "`sudo orama node restart`" + `, ` + "`sudo orama node stop`" + `, ` + "`sudo orama node start`" + `
- NEVER use raw systemctl commands (they skip important lifecycle hooks).
- For rolling restarts: upgrade followers first, leader LAST, one node at a time.
- Check RQLite leader: ` + "`curl -s localhost:4001/status | python3 -c \"import sys,json; print(json.load(sys.stdin)['store']['raft']['state'])\"`" + `

## Response Format
Respond in this exact structure:

### Root Cause
What is causing these failures? If multiple issues, explain each briefly.

### Impact
What is broken for users right now? Can they still deploy apps, access services?

### Fix
Step-by-step commands to resolve. Include actual node IPs/names from the data when possible.

### Prevention
What could prevent this in the future? (omit if not applicable)`

// SubsystemAnalysis holds the AI analysis for a single subsystem or failure group.
type SubsystemAnalysis struct {
	Subsystem string
	GroupID   string // e.g. "anyone.bootstrapped" — empty when analyzing whole subsystem
	Analysis  string
	Duration  time.Duration
	Error     error
}

// AnalysisResult holds the AI's analysis of check failures.
type AnalysisResult struct {
	Model    string
	Analyses []SubsystemAnalysis
	Duration time.Duration
}

// Analyze sends failures and cluster context to OpenRouter for AI analysis.
// Each subsystem with issues gets its own API call, run in parallel.
func Analyze(results *Results, data *ClusterData, model, apiKey string) (*AnalysisResult, error) {
	if apiKey == "" {
		apiKey = os.Getenv("OPENROUTER_API_KEY")
	}
	if apiKey == "" {
		return nil, fmt.Errorf("no API key: set --api-key or OPENROUTER_API_KEY env")
	}

	// Group failures and warnings by subsystem
	issues := results.FailuresAndWarnings()
	bySubsystem := map[string][]CheckResult{}
	for _, c := range issues {
		bySubsystem[c.Subsystem] = append(bySubsystem[c.Subsystem], c)
	}

	if len(bySubsystem) == 0 {
		return &AnalysisResult{Model: model}, nil
	}

	// Build healthy summary (subsystems with zero failures/warnings)
	healthySummary := buildHealthySummary(results, bySubsystem)

	// Build collection errors summary
	collectionErrors := buildCollectionErrors(data)

	// Build cluster overview (shared across all calls)
	clusterOverview := buildClusterOverview(data, results)

	// Launch one AI call per subsystem in parallel
	start := time.Now()
	var mu sync.Mutex
	var wg sync.WaitGroup
	var analyses []SubsystemAnalysis

	// Sort subsystems for deterministic ordering
	subsystems := make([]string, 0, len(bySubsystem))
	for sub := range bySubsystem {
		subsystems = append(subsystems, sub)
	}
	sort.Strings(subsystems)

	for _, sub := range subsystems {
		checks := bySubsystem[sub]
		wg.Add(1)
		go func(subsystem string, checks []CheckResult) {
			defer wg.Done()

			prompt := buildSubsystemPrompt(subsystem, checks, data, clusterOverview, healthySummary, collectionErrors)
			subStart := time.Now()
			response, err := callOpenRouter(model, apiKey, prompt)

			sa := SubsystemAnalysis{
				Subsystem: subsystem,
				Duration:  time.Since(subStart),
			}
			if err != nil {
				sa.Error = err
			} else {
				sa.Analysis = response
			}

			mu.Lock()
			analyses = append(analyses, sa)
			mu.Unlock()
		}(sub, checks)
	}
	wg.Wait()

	// Sort by subsystem name for consistent output
	sort.Slice(analyses, func(i, j int) bool {
		return analyses[i].Subsystem < analyses[j].Subsystem
	})

	return &AnalysisResult{
		Model:    model,
		Analyses: analyses,
		Duration: time.Since(start),
	}, nil
}

// AnalyzeGroups sends each failure group to OpenRouter for focused AI analysis.
// Unlike Analyze which sends one call per subsystem, this sends one call per unique
// failure pattern, producing more focused and actionable results.
func AnalyzeGroups(groups []FailureGroup, results *Results, data *ClusterData, model, apiKey string) (*AnalysisResult, error) {
	if apiKey == "" {
		apiKey = os.Getenv("OPENROUTER_API_KEY")
	}
	if apiKey == "" {
		return nil, fmt.Errorf("no API key: set --api-key or OPENROUTER_API_KEY env")
	}

	if len(groups) == 0 {
		return &AnalysisResult{Model: model}, nil
	}

	// Build shared context
	issuesBySubsystem := map[string][]CheckResult{}
	for _, c := range results.FailuresAndWarnings() {
		issuesBySubsystem[c.Subsystem] = append(issuesBySubsystem[c.Subsystem], c)
	}
	healthySummary := buildHealthySummary(results, issuesBySubsystem)
	collectionErrors := buildCollectionErrors(data)

	start := time.Now()
	var mu sync.Mutex
	var wg sync.WaitGroup
	var analyses []SubsystemAnalysis

	for _, g := range groups {
		wg.Add(1)
		go func(group FailureGroup) {
			defer wg.Done()

			prompt := buildGroupPrompt(group, data, healthySummary, collectionErrors)
			subStart := time.Now()
			response, err := callOpenRouter(model, apiKey, prompt)

			sa := SubsystemAnalysis{
				Subsystem: group.Subsystem,
				GroupID:   group.ID,
				Duration:  time.Since(subStart),
			}
			if err != nil {
				sa.Error = err
			} else {
				sa.Analysis = response
			}

			mu.Lock()
			analyses = append(analyses, sa)
			mu.Unlock()
		}(g)
	}
	wg.Wait()

	// Sort by subsystem then group ID for consistent output
	sort.Slice(analyses, func(i, j int) bool {
		if analyses[i].Subsystem != analyses[j].Subsystem {
			return analyses[i].Subsystem < analyses[j].Subsystem
		}
		return analyses[i].GroupID < analyses[j].GroupID
	})

	return &AnalysisResult{
		Model:    model,
		Analyses: analyses,
		Duration: time.Since(start),
	}, nil
}

func buildGroupPrompt(group FailureGroup, data *ClusterData, healthySummary, collectionErrors string) string {
	var b strings.Builder

	icon := "FAILURE"
	if group.Status == StatusWarn {
		icon = "WARNING"
	}

	b.WriteString(fmt.Sprintf("## %s: %s\n\n", icon, group.Name))
	b.WriteString(fmt.Sprintf("**Check ID:** %s  \n", group.ID))
	b.WriteString(fmt.Sprintf("**Severity:** %s  \n", group.Severity))
	b.WriteString(fmt.Sprintf("**Nodes affected:** %d  \n\n", len(group.Nodes)))

	b.WriteString("**Affected nodes:**\n")
	for _, n := range group.Nodes {
		b.WriteString(fmt.Sprintf("- %s\n", n))
	}
	b.WriteString("\n")

	b.WriteString("**Error messages:**\n")
	for _, m := range group.Messages {
		b.WriteString(fmt.Sprintf("- %s\n", m))
	}
	b.WriteString("\n")

	// Subsystem raw data
	contextData := buildSubsystemContext(group.Subsystem, data)
	if contextData != "" {
		b.WriteString(fmt.Sprintf("## %s Raw Data (all nodes)\n", strings.ToUpper(group.Subsystem)))
		b.WriteString(contextData)
		b.WriteString("\n")
	}

	if healthySummary != "" {
		b.WriteString("## Healthy Subsystems\n")
		b.WriteString(healthySummary)
		b.WriteString("\n")
	}

	if collectionErrors != "" {
		b.WriteString("## Collection Errors\n")
		b.WriteString(collectionErrors)
		b.WriteString("\n")
	}

	b.WriteString(fmt.Sprintf("\nAnalyze this specific %s issue. Be concise — focus on this one problem.\n", group.Subsystem))
	return b.String()
}

func buildClusterOverview(data *ClusterData, results *Results) string {
	var b strings.Builder
	b.WriteString(fmt.Sprintf("Nodes: %d\n", len(data.Nodes)))
	for host, nd := range data.Nodes {
		b.WriteString(fmt.Sprintf("- %s (role: %s)\n", host, nd.Node.Role))
	}
	passed, failed, warned, skipped := results.Summary()
	b.WriteString(fmt.Sprintf("\nCheck totals: %d passed, %d failed, %d warnings, %d skipped\n", passed, failed, warned, skipped))
	return b.String()
}

func buildHealthySummary(results *Results, issueSubsystems map[string][]CheckResult) string {
	// Count passes per subsystem
	passBySubsystem := map[string]int{}
	totalBySubsystem := map[string]int{}
	for _, c := range results.Checks {
		totalBySubsystem[c.Subsystem]++
		if c.Status == StatusPass {
			passBySubsystem[c.Subsystem]++
		}
	}

	var b strings.Builder
	for sub, total := range totalBySubsystem {
		if _, hasIssues := issueSubsystems[sub]; hasIssues {
			continue
		}
		passed := passBySubsystem[sub]
		if passed == total && total > 0 {
			b.WriteString(fmt.Sprintf("- %s: all %d checks pass\n", sub, total))
		}
	}

	if b.Len() == 0 {
		return ""
	}
	return b.String()
}

func buildCollectionErrors(data *ClusterData) string {
	var b strings.Builder
	for _, nd := range data.Nodes {
		if len(nd.Errors) > 0 {
			for _, e := range nd.Errors {
				b.WriteString(fmt.Sprintf("- %s: %s\n", nd.Node.Name(), e))
			}
		}
	}
	return b.String()
}

func buildSubsystemPrompt(subsystem string, checks []CheckResult, data *ClusterData, clusterOverview, healthySummary, collectionErrors string) string {
	var b strings.Builder

	b.WriteString("## Cluster Overview\n")
	b.WriteString(clusterOverview)
	b.WriteString("\n")

	// Failures
	var failures, warnings []CheckResult
	for _, c := range checks {
		if c.Status == StatusFail {
			failures = append(failures, c)
		} else if c.Status == StatusWarn {
			warnings = append(warnings, c)
		}
	}

	if len(failures) > 0 {
		b.WriteString(fmt.Sprintf("## %s Failures\n", strings.ToUpper(subsystem)))
		for _, f := range failures {
			node := f.Node
			if node == "" {
				node = "cluster-wide"
			}
			b.WriteString(fmt.Sprintf("- [%s] %s (%s): %s\n", f.Severity, f.Name, node, f.Message))
		}
		b.WriteString("\n")
	}

	if len(warnings) > 0 {
		b.WriteString(fmt.Sprintf("## %s Warnings\n", strings.ToUpper(subsystem)))
		for _, w := range warnings {
			node := w.Node
			if node == "" {
				node = "cluster-wide"
			}
			b.WriteString(fmt.Sprintf("- [%s] %s (%s): %s\n", w.Severity, w.Name, node, w.Message))
		}
		b.WriteString("\n")
	}

	// Subsystem-specific raw data
	contextData := buildSubsystemContext(subsystem, data)
	if contextData != "" {
		b.WriteString(fmt.Sprintf("## %s Raw Data\n", strings.ToUpper(subsystem)))
		b.WriteString(contextData)
		b.WriteString("\n")
	}

	// Healthy subsystems for cross-reference
	if healthySummary != "" {
		b.WriteString("## Healthy Subsystems (for context)\n")
		b.WriteString(healthySummary)
		b.WriteString("\n")
	}

	// Collection errors
	if collectionErrors != "" {
		b.WriteString("## Collection Errors\n")
		b.WriteString(collectionErrors)
		b.WriteString("\n")
	}

	b.WriteString(fmt.Sprintf("\nAnalyze the %s issues above.\n", subsystem))
	return b.String()
}

// buildSubsystemContext dispatches to the right context builder.
func buildSubsystemContext(subsystem string, data *ClusterData) string {
	switch subsystem {
	case "rqlite":
		return buildRQLiteContext(data)
	case "olric":
		return buildOlricContext(data)
	case "ipfs":
		return buildIPFSContext(data)
	case "dns":
		return buildDNSContext(data)
	case "wireguard":
		return buildWireGuardContext(data)
	case "system":
		return buildSystemContext(data)
	case "network":
		return buildNetworkContext(data)
	case "namespace":
		return buildNamespaceContext(data)
	case "anyone":
		return buildAnyoneContext(data)
	default:
		return ""
	}
}

func buildRQLiteContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.RQLite == nil {
			continue
		}
		b.WriteString(fmt.Sprintf("### %s\n", host))
		if !nd.RQLite.Responsive {
			b.WriteString("  NOT RESPONDING\n")
			continue
		}
		if s := nd.RQLite.Status; s != nil {
			b.WriteString(fmt.Sprintf("  raft_state=%s term=%d applied=%d commit=%d leader=%s peers=%d voter=%v\n",
				s.RaftState, s.Term, s.AppliedIndex, s.CommitIndex, s.LeaderNodeID, s.NumPeers, s.Voter))
			b.WriteString(fmt.Sprintf("  fsm_pending=%d db_size=%s version=%s goroutines=%d uptime=%s\n",
				s.FsmPending, s.DBSizeFriendly, s.Version, s.Goroutines, s.Uptime))
		}
		if r := nd.RQLite.Readyz; r != nil {
			b.WriteString(fmt.Sprintf("  readyz=%v store=%s leader=%s\n", r.Ready, r.Store, r.Leader))
		}
		if d := nd.RQLite.DebugVars; d != nil {
			b.WriteString(fmt.Sprintf("  query_errors=%d execute_errors=%d leader_not_found=%d snapshot_errors=%d\n",
				d.QueryErrors, d.ExecuteErrors, d.LeaderNotFound, d.SnapshotErrors))
		}
		b.WriteString(fmt.Sprintf("  strong_read=%v\n", nd.RQLite.StrongRead))
		if nd.RQLite.Nodes != nil {
			b.WriteString(fmt.Sprintf("  /nodes (%d members):", len(nd.RQLite.Nodes)))
			for addr, n := range nd.RQLite.Nodes {
				reachable := "ok"
				if !n.Reachable {
					reachable = "UNREACHABLE"
				}
				leader := ""
				if n.Leader {
					leader = " LEADER"
				}
				b.WriteString(fmt.Sprintf(" %s(%s%s)", addr, reachable, leader))
			}
			b.WriteString("\n")
		}
	}
	return b.String()
}

func buildOlricContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.Olric == nil {
			continue
		}
		o := nd.Olric
		b.WriteString(fmt.Sprintf("### %s\n", host))
		b.WriteString(fmt.Sprintf("  active=%v memberlist=%v members=%d coordinator=%s\n",
			o.ServiceActive, o.MemberlistUp, o.MemberCount, o.Coordinator))
		b.WriteString(fmt.Sprintf("  memory=%dMB restarts=%d log_errors=%d suspects=%d flapping=%d\n",
			o.ProcessMemMB, o.RestartCount, o.LogErrors, o.LogSuspects, o.LogFlapping))
	}
	return b.String()
}

func buildIPFSContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.IPFS == nil {
			continue
		}
		ip := nd.IPFS
		repoPct := 0.0
		if ip.RepoMaxBytes > 0 {
			repoPct = float64(ip.RepoSizeBytes) / float64(ip.RepoMaxBytes) * 100
		}
		b.WriteString(fmt.Sprintf("### %s\n", host))
		b.WriteString(fmt.Sprintf("  daemon=%v cluster=%v swarm_peers=%d cluster_peers=%d cluster_errors=%d\n",
			ip.DaemonActive, ip.ClusterActive, ip.SwarmPeerCount, ip.ClusterPeerCount, ip.ClusterErrors))
		b.WriteString(fmt.Sprintf("  repo=%.0f%% (%d/%d bytes) kubo=%s cluster=%s\n",
			repoPct, ip.RepoSizeBytes, ip.RepoMaxBytes, ip.KuboVersion, ip.ClusterVersion))
		b.WriteString(fmt.Sprintf("  swarm_key=%v bootstrap_empty=%v\n", ip.HasSwarmKey, ip.BootstrapEmpty))
	}
	return b.String()
}

func buildDNSContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.DNS == nil {
			continue
		}
		d := nd.DNS
		b.WriteString(fmt.Sprintf("### %s\n", host))
		b.WriteString(fmt.Sprintf("  coredns=%v caddy=%v ports(53=%v,80=%v,443=%v) corefile=%v\n",
			d.CoreDNSActive, d.CaddyActive, d.Port53Bound, d.Port80Bound, d.Port443Bound, d.CorefileExists))
		b.WriteString(fmt.Sprintf("  memory=%dMB restarts=%d log_errors=%d\n",
			d.CoreDNSMemMB, d.CoreDNSRestarts, d.LogErrors))
		b.WriteString(fmt.Sprintf("  resolve: SOA=%v NS=%v(count=%d) wildcard=%v base_A=%v\n",
			d.SOAResolves, d.NSResolves, d.NSRecordCount, d.WildcardResolves, d.BaseAResolves))
		b.WriteString(fmt.Sprintf("  tls: base=%d days, wildcard=%d days\n",
			d.BaseTLSDaysLeft, d.WildTLSDaysLeft))
	}
	return b.String()
}

func buildWireGuardContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.WireGuard == nil {
			continue
		}
		wg := nd.WireGuard
		b.WriteString(fmt.Sprintf("### %s\n", host))
		b.WriteString(fmt.Sprintf("  interface=%v service=%v ip=%s port=%d peers=%d mtu=%d\n",
			wg.InterfaceUp, wg.ServiceActive, wg.WgIP, wg.ListenPort, wg.PeerCount, wg.MTU))
		b.WriteString(fmt.Sprintf("  config=%v perms=%s\n", wg.ConfigExists, wg.ConfigPerms))
		for _, p := range wg.Peers {
			age := "never"
			if p.LatestHandshake > 0 {
				age = fmt.Sprintf("%ds ago", time.Now().Unix()-p.LatestHandshake)
			}
			keyPrefix := p.PublicKey
			if len(keyPrefix) > 8 {
				keyPrefix = keyPrefix[:8] + "..."
			}
			b.WriteString(fmt.Sprintf("  peer %s: allowed=%s handshake=%s rx=%d tx=%d\n",
				keyPrefix, p.AllowedIPs, age, p.TransferRx, p.TransferTx))
		}
	}
	return b.String()
}

func buildSystemContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.System == nil {
			continue
		}
		s := nd.System
		memPct := 0
		if s.MemTotalMB > 0 {
			memPct = s.MemUsedMB * 100 / s.MemTotalMB
		}
		b.WriteString(fmt.Sprintf("### %s\n", host))
		b.WriteString(fmt.Sprintf("  mem=%d%% (%d/%dMB) disk=%d%% load=%s cpus=%d\n",
			memPct, s.MemUsedMB, s.MemTotalMB, s.DiskUsePct, s.LoadAvg, s.CPUCount))
		b.WriteString(fmt.Sprintf("  oom=%d swap=%d/%dMB inodes=%d%% ufw=%v user=%s panics=%d\n",
			s.OOMKills, s.SwapUsedMB, s.SwapTotalMB, s.InodePct, s.UFWActive, s.ProcessUser, s.PanicCount))
		if len(s.FailedUnits) > 0 {
			b.WriteString(fmt.Sprintf("  failed_units: %s\n", strings.Join(s.FailedUnits, ", ")))
		}
	}
	return b.String()
}

func buildNetworkContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.Network == nil {
			continue
		}
		n := nd.Network
		b.WriteString(fmt.Sprintf("### %s\n", host))
		b.WriteString(fmt.Sprintf("  internet=%v default_route=%v wg_route=%v\n",
			n.InternetReachable, n.DefaultRoute, n.WGRouteExists))
		b.WriteString(fmt.Sprintf("  tcp: established=%d time_wait=%d retransmit=%.2f%%\n",
			n.TCPEstablished, n.TCPTimeWait, n.TCPRetransRate))
		if len(n.PingResults) > 0 {
			var failed []string
			for ip, ok := range n.PingResults {
				if !ok {
					failed = append(failed, ip)
				}
			}
			if len(failed) > 0 {
				b.WriteString(fmt.Sprintf("  mesh_ping_failed: %s\n", strings.Join(failed, ", ")))
			} else {
				b.WriteString(fmt.Sprintf("  mesh_ping: all %d peers OK\n", len(n.PingResults)))
			}
		}
	}
	return b.String()
}

func buildNamespaceContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if len(nd.Namespaces) == 0 {
			continue
		}
		b.WriteString(fmt.Sprintf("### %s (%d namespaces)\n", host, len(nd.Namespaces)))
		for _, ns := range nd.Namespaces {
			b.WriteString(fmt.Sprintf("  ns=%s port_base=%d rqlite=%v(state=%s,ready=%v) olric=%v gateway=%v(status=%d)\n",
				ns.Name, ns.PortBase, ns.RQLiteUp, ns.RQLiteState, ns.RQLiteReady, ns.OlricUp, ns.GatewayUp, ns.GatewayStatus))
		}
	}
	return b.String()
}

func buildAnyoneContext(data *ClusterData) string {
	var b strings.Builder
	for host, nd := range data.Nodes {
		if nd.Anyone == nil {
			continue
		}
		a := nd.Anyone
		if !a.RelayActive && !a.ClientActive {
			continue
		}
		b.WriteString(fmt.Sprintf("### %s\n", host))
		b.WriteString(fmt.Sprintf("  relay=%v client=%v orport=%v socks=%v control=%v\n",
			a.RelayActive, a.ClientActive, a.ORPortListening, a.SocksListening, a.ControlListening))
		if a.RelayActive {
			b.WriteString(fmt.Sprintf("  bootstrap=%d%% fingerprint=%s nickname=%s\n",
				a.BootstrapPct, a.Fingerprint, a.Nickname))
		}
		if len(a.ORPortReachable) > 0 {
			var unreachable []string
			for h, ok := range a.ORPortReachable {
				if !ok {
					unreachable = append(unreachable, h)
				}
			}
			if len(unreachable) > 0 {
				b.WriteString(fmt.Sprintf("  orport_unreachable: %s\n", strings.Join(unreachable, ", ")))
			} else {
				b.WriteString(fmt.Sprintf("  orport: all %d peers reachable\n", len(a.ORPortReachable)))
			}
		}
	}
	return b.String()
}

// OpenRouter API types (OpenAI-compatible)

type openRouterRequest struct {
	Model    string              `json:"model"`
	Messages []openRouterMessage `json:"messages"`
}

type openRouterMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type openRouterResponse struct {
	Choices []struct {
		Message struct {
			Content string `json:"content"`
		} `json:"message"`
	} `json:"choices"`
	Error *struct {
		Message string `json:"message"`
		Code    int    `json:"code"`
	} `json:"error"`
}

func callOpenRouter(model, apiKey, prompt string) (string, error) {
	reqBody := openRouterRequest{
		Model: model,
		Messages: []openRouterMessage{
			{Role: "system", Content: systemPrompt},
			{Role: "user", Content: prompt},
		},
	}

	jsonBody, err := json.Marshal(reqBody)
	if err != nil {
		return "", fmt.Errorf("marshal request: %w", err)
	}

	req, err := http.NewRequest("POST", "https://openrouter.ai/api/v1/chat/completions", bytes.NewReader(jsonBody))
	if err != nil {
		return "", fmt.Errorf("create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")
	req.Header.Set("Authorization", "Bearer "+apiKey)

	client := &http.Client{Timeout: 180 * time.Second}
	resp, err := client.Do(req)
	if err != nil {
		return "", fmt.Errorf("HTTP request: %w", err)
	}
	defer resp.Body.Close()

	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return "", fmt.Errorf("read response: %w", err)
	}

	if resp.StatusCode != http.StatusOK {
		return "", fmt.Errorf("API returned %d: %s", resp.StatusCode, string(body))
	}

	var orResp openRouterResponse
	if err := json.Unmarshal(body, &orResp); err != nil {
		return "", fmt.Errorf("unmarshal response: %w", err)
	}

	if orResp.Error != nil {
		return "", fmt.Errorf("API error: %s", orResp.Error.Message)
	}

	if len(orResp.Choices) == 0 {
		return "", fmt.Errorf("no choices in response (raw: %s)", truncate(string(body), 500))
	}

	content := orResp.Choices[0].Message.Content
	if strings.TrimSpace(content) == "" {
		return "", fmt.Errorf("model returned empty response (raw: %s)", truncate(string(body), 500))
	}

	return content, nil
}

func truncate(s string, max int) string {
	if len(s) <= max {
		return s
	}
	return s[:max] + "..."
}

// PrintAnalysis writes the AI analysis to the output, one section per subsystem.
func PrintAnalysis(result *AnalysisResult, w io.Writer) {
	fmt.Fprintf(w, "\n## AI Analysis (%s)\n", result.Model)
	fmt.Fprintf(w, "%s\n", strings.Repeat("-", 70))

	for _, sa := range result.Analyses {
		fmt.Fprintf(w, "\n### %s\n\n", strings.ToUpper(sa.Subsystem))
		if sa.Error != nil {
			fmt.Fprintf(w, "Analysis failed: %v\n", sa.Error)
		} else {
			fmt.Fprintf(w, "%s\n", sa.Analysis)
		}
	}

	fmt.Fprintf(w, "\n%s\n", strings.Repeat("-", 70))
	fmt.Fprintf(w, "(Analysis took %.1fs — %d subsystems analyzed)\n", result.Duration.Seconds(), len(result.Analyses))
}