anonpenguin23 6898f47e2e Replace sshpass password auth with RootWallet SSH keys
Replaces plaintext password-based SSH authentication (sshpass) across
the entire Go CLI with wallet-derived ed25519 keys via RootWallet.

- Add `rw vault ssh agent-load` command to RootWallet CLI for SSH
  agent forwarding in push fanout
- Create wallet.go bridge: PrepareNodeKeys resolves keys from `rw
  vault ssh get --priv`, writes temp PEMs (0600), zero-overwrites
  on cleanup
- Remove Password field from Node struct, update config parser to
  new 3-field format (env|user@host|role)
- Remove all sshpass branches from inspector/ssh.go and
  remotessh/ssh.go, require SSHKey on all SSH paths
- Add WithAgentForward() option to RunSSHStreaming for hub fanout
- Add PrepareNodeKeys + defer cleanup to all 7 entry points:
  inspect, monitor, push, upgrade, clean, recover, install
- Update push fanout to use SSH agent forwarding instead of sshpass
  on hub
- Delete install/ssh.go duplicate, replace with remotessh calls
- Create nodes.conf from remote-nodes.conf (topology only, no
  secrets)
- Update all config defaults and help text from remote-nodes.conf
  to nodes.conf
- Use StrictHostKeyChecking=accept-new consistently everywhere
2026-02-24 17:24:16 +02:00

157 lines
3.6 KiB
Go

package inspector
import (
"bytes"
"context"
"fmt"
"os/exec"
"strings"
"syscall"
"time"
)
const (
sshMaxRetries = 3
sshRetryDelay = 2 * time.Second
)
// SSHResult holds the output of an SSH command execution.
type SSHResult struct {
Stdout string
Stderr string
ExitCode int
Duration time.Duration
Err error
Retries int // how many retries were needed
}
// OK returns true if the command succeeded (exit code 0, no error).
func (r SSHResult) OK() bool {
return r.Err == nil && r.ExitCode == 0
}
// RunSSH executes a command on a remote node via SSH with retry on connection failure.
// Requires node.SSHKey to be set (via PrepareNodeKeys).
// The -n flag is used to prevent SSH from reading stdin.
func RunSSH(ctx context.Context, node Node, command string) SSHResult {
var result SSHResult
for attempt := 0; attempt <= sshMaxRetries; attempt++ {
result = runSSHOnce(ctx, node, command)
result.Retries = attempt
// Success — return immediately
if result.OK() {
return result
}
// If the command ran but returned non-zero exit, that's the remote command
// failing (not a connection issue) — don't retry
if result.Err == nil && result.ExitCode != 0 {
return result
}
// Check if it's a connection-level failure worth retrying
if !isSSHConnectionError(result) {
return result
}
// Don't retry if context is done
if ctx.Err() != nil {
return result
}
// Wait before retry (except on last attempt)
if attempt < sshMaxRetries {
select {
case <-time.After(sshRetryDelay):
case <-ctx.Done():
return result
}
}
}
return result
}
// runSSHOnce executes a single SSH attempt.
func runSSHOnce(ctx context.Context, node Node, command string) SSHResult {
start := time.Now()
if node.SSHKey == "" {
return SSHResult{
Duration: 0,
Err: fmt.Errorf("no SSH key for %s (call PrepareNodeKeys first)", node.Name()),
}
}
args := []string{
"ssh", "-n",
"-o", "StrictHostKeyChecking=accept-new",
"-o", "ConnectTimeout=10",
"-o", "BatchMode=yes",
"-i", node.SSHKey,
fmt.Sprintf("%s@%s", node.User, node.Host),
command,
}
cmd := exec.CommandContext(ctx, args[0], args[1:]...)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
duration := time.Since(start)
exitCode := 0
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
exitCode = status.ExitStatus()
}
}
}
return SSHResult{
Stdout: strings.TrimSpace(stdout.String()),
Stderr: strings.TrimSpace(stderr.String()),
ExitCode: exitCode,
Duration: duration,
Err: err,
}
}
// isSSHConnectionError returns true if the failure looks like an SSH connection
// problem (timeout, refused, network unreachable) rather than a remote command error.
func isSSHConnectionError(r SSHResult) bool {
// SSH exit code 255 = SSH connection error (retriable)
if r.ExitCode == 255 {
return true
}
stderr := strings.ToLower(r.Stderr)
connectionErrors := []string{
"connection refused",
"connection timed out",
"connection reset",
"no route to host",
"network is unreachable",
"could not resolve hostname",
"ssh_exchange_identification",
"broken pipe",
"connection closed by remote host",
}
for _, pattern := range connectionErrors {
if strings.Contains(stderr, pattern) {
return true
}
}
return false
}
// RunSSHMulti executes a multi-command string on a remote node.
// Commands are joined with " && " so failure stops execution.
func RunSSHMulti(ctx context.Context, node Node, commands []string) SSHResult {
combined := strings.Join(commands, " && ")
return RunSSH(ctx, node, combined)
}