diff --git a/CHANGELOG.md b/CHANGELOG.md index 33d119d..b41efd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog][keepachangelog] and adheres to [Semant ### Changed - Updated readme +- Where we read .yaml files from and where data is saved to ~/.debros ### Deprecated diff --git a/Makefile b/Makefile index 4e611b4..2c93cdd 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ test-e2e: .PHONY: build clean test run-node run-node2 run-node3 run-example deps tidy fmt vet lint clear-ports -VERSION := 0.51.4-beta +VERSION := 0.51.5-beta COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) DATE ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) LDFLAGS := -X 'main.version=$(VERSION)' -X 'main.commit=$(COMMIT)' -X 'main.date=$(DATE)' @@ -46,35 +46,35 @@ clean: # Run bootstrap node (auto-selects identity and data dir) run-node: - @echo "Starting bootstrap node with config..." - go run ./cmd/node --config configs/bootstrap.yaml + @echo "Starting bootstrap node..." + @echo "Config: ~/.debros/bootstrap.yaml" + @echo "Generate it with: network-cli config init --type bootstrap" + go run ./cmd/node --config node.yaml # Run second node (regular) - requires join address of bootstrap node # Usage: make run-node2 JOINADDR=/ip4/127.0.0.1/tcp/5001 HTTP=5002 RAFT=7002 P2P=4002 run-node2: - @echo "Starting regular node2 with config..." - go run ./cmd/node --config configs/node.yaml + @echo "Starting regular node (node.yaml)..." + @echo "Config: ~/.debros/node.yaml" + @echo "Generate it with: network-cli config init --type node --join localhost:5001 --bootstrap-peers ''" + go run ./cmd/node --config node2.yaml # Run third node (regular) - requires join address of bootstrap node # Usage: make run-node3 JOINADDR=/ip4/127.0.0.1/tcp/5001 HTTP=5003 RAFT=7003 P2P=4003 run-node3: - @echo "Starting regular node3 with config..." - go run ./cmd/node --config configs/node3.yaml + @echo "Starting regular node (node2.yaml)..." + @echo "Config: ~/.debros/node2.yaml" + @echo "Generate it with: network-cli config init --type node --name node2.yaml --join localhost:5001 --bootstrap-peers ''" + go run ./cmd/node --config node3.yaml # Run gateway HTTP server # Usage examples: -# make run-gateway # uses defaults (:8080, namespace=default) -# GATEWAY_ADDR=":8081" make run-gateway # override listen addr via env -# GATEWAY_NAMESPACE=myapp make run-gateway # set namespace -# GATEWAY_BOOTSTRAP_PEERS="/ip4/127.0.0.1/tcp/4001/p2p/" make run-gateway -# GATEWAY_REQUIRE_AUTH=1 GATEWAY_API_KEYS="key1:ns1,key2:ns2" make run-gateway +# make run-gateway # uses ~/.debros/gateway.yaml +# Config generated with: network-cli config init --type gateway run-gateway: @echo "Starting gateway HTTP server..." - GATEWAY_ADDR=$(or $(ADDR),$(GATEWAY_ADDR)) \ - GATEWAY_NAMESPACE=$(or $(NAMESPACE),$(GATEWAY_NAMESPACE)) \ - GATEWAY_BOOTSTRAP_PEERS=$(GATEWAY_BOOTSTRAP_PEERS) \ - GATEWAY_REQUIRE_AUTH=$(GATEWAY_REQUIRE_AUTH) \ - GATEWAY_API_KEYS=$(GATEWAY_API_KEYS) \ + @echo "Note: Config must be in ~/.debros/gateway.yaml" + @echo "Generate it with: network-cli config init --type gateway" go run ./cmd/gateway # Run basic usage example @@ -155,15 +155,28 @@ dev-setup: deps # Start development cluster (requires multiple terminals) dev-cluster: - @echo "To start a development cluster, run these commands in separate terminals:" - @echo "1. make run-node # Start bootstrap node (uses configs/bootstrap.yaml)" - @echo "2. make run-node2 # Start second node (uses configs/node.yaml)" - @echo "3. make run-node3 # Start third node (uses configs/node.yaml)" - @echo "4. make run-example # Test basic functionality" - @echo "5. make cli-health # Check network health" - @echo "6. make cli-peers # List peers" - @echo "7. make cli-storage-test # Test storage" - @echo "8. make cli-pubsub-test # Test messaging" + @echo "To start a development cluster with 3 nodes:" + @echo "" + @echo "1. Generate config files in ~/.debros:" + @echo " make build" + @echo " ./bin/network-cli config init --type bootstrap" + @echo " ./bin/network-cli config init --type node --name node.yaml --bootstrap-peers ''" + @echo " ./bin/network-cli config init --type node --name node2.yaml --bootstrap-peers ''" + @echo "" + @echo "2. Run in separate terminals:" + @echo " Terminal 1: make run-node # Start bootstrap node (bootstrap.yaml)" + @echo " Terminal 2: make run-node2 # Start node 1 (node.yaml)" + @echo " Terminal 3: make run-node3 # Start node 2 (node2.yaml)" + @echo " Terminal 4: make run-gateway # Start gateway" + @echo "" + @echo "3. Or run custom node with any config file:" + @echo " go run ./cmd/node --config custom-node.yaml" + @echo "" + @echo "4. Test:" + @echo " make cli-health # Check network health" + @echo " make cli-peers # List peers" + @echo " make cli-storage-test # Test storage" + @echo " make cli-pubsub-test # Test messaging" # Full development workflow dev: clean build test @@ -175,22 +188,43 @@ help: @echo " build - Build all executables" @echo " clean - Clean build artifacts" @echo " test - Run tests" + @echo "" + @echo "Configuration (NEW):" + @echo " First, generate config files in ~/.debros with:" + @echo " make build # Build CLI first" + @echo " ./bin/network-cli config init --type bootstrap # Generate bootstrap config" + @echo " ./bin/network-cli config init --type node --bootstrap-peers ''" + @echo " ./bin/network-cli config init --type gateway" + @echo "" + @echo "Network Targets (requires config files in ~/.debros):" @echo " run-node - Start bootstrap node" - @echo " run-node2 - Start second node (requires JOINADDR, optional HTTP/RAFT/P2P)" - @echo " run-node3 - Start third node (requires JOINADDR, optional HTTP/RAFT/P2P)" - @echo " run-gateway - Start HTTP gateway (flags via env: GATEWAY_ADDR, GATEWAY_NAMESPACE, GATEWAY_BOOTSTRAP_PEERS, GATEWAY_REQUIRE_AUTH, GATEWAY_API_KEYS)" + @echo " run-node2 - Start second node" + @echo " run-node3 - Start third node" + @echo " run-gateway - Start HTTP gateway" @echo " run-example - Run usage example" + @echo "" + @echo "Running Multiple Nodes:" + @echo " Nodes use --config flag to select which YAML file in ~/.debros to load:" + @echo " go run ./cmd/node --config bootstrap.yaml" + @echo " go run ./cmd/node --config node.yaml" + @echo " go run ./cmd/node --config node2.yaml" + @echo " Generate configs with: ./bin/network-cli config init --name " + @echo "" + @echo "CLI Commands:" @echo " run-cli - Run network CLI help" - @echo " show-bootstrap - Show example bootstrap usage with flags" @echo " cli-health - Check network health" @echo " cli-peers - List network peers" @echo " cli-status - Get network status" @echo " cli-storage-test - Test storage operations" @echo " cli-pubsub-test - Test pub/sub operations" + @echo "" + @echo "Development:" @echo " test-multinode - Full multi-node test with 1 bootstrap + 2 nodes" @echo " test-peer-discovery - Test peer discovery (requires running nodes)" @echo " test-replication - Test data replication (requires running nodes)" @echo " test-consensus - Test database consensus (requires running nodes)" + @echo "" + @echo "Maintenance:" @echo " deps - Download dependencies" @echo " tidy - Tidy dependencies" @echo " fmt - Format code" diff --git a/README.md b/README.md index c30e21e..af7c92f 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,75 @@ A robust, decentralized peer-to-peer network built in Go, providing distributed - **5001:** RQLite HTTP API - **7001:** RQLite Raft consensus +### Filesystem Permissions + +DeBros Network stores all configuration and data in `~/.debros/` directory. Ensure you have: + +- **Read/Write access** to your home directory (`~`) +- **Available disk space**: At least 10GB for database and logs +- **No restrictive mount options**: The home directory must not be mounted read-only +- **Unix permissions**: Standard user permissions are sufficient (no root/sudo required) + +#### Directory Structure + +DeBros automatically creates the following directory structure: + +``` +~/.debros/ +├── bootstrap.yaml # Bootstrap node config +├── node.yaml # Node config +├── gateway.yaml # Gateway config +├── bootstrap/ # Bootstrap node data (auto-created) +│ ├── rqlite/ # RQLite database files +│ │ ├── db.sqlite # Main database +│ │ ├── raft/ # Raft consensus data +│ │ └── rsnapshots/ # Raft snapshots +│ ├── peer.info # Node multiaddr (created at startup) +│ └── identity.key # Node private key (created at startup) +├── node/ # Node data (auto-created) +│ ├── rqlite/ # RQLite database files +│ ├── raft/ # Raft data +│ ├── peer.info # Node multiaddr (created at startup) +│ └── identity.key # Node private key (created at startup) +└── node2/ # Additional node configs (if running multiple) + └── rqlite/ # RQLite database files +``` + +**Files Created at Startup:** +- `identity.key` - LibP2P private key for the node (generated once, reused) +- `peer.info` - The node's multiaddr (e.g., `/ip4/0.0.0.0/tcp/4001/p2p/12D3KooW...`) + +**Automatic Creation**: The node automatically creates all necessary data directories when started. You only need to ensure: +1. `~/.debros/` is writable +2. Sufficient disk space available +3. Correct config files exist + +**Permission Check:** + +```bash +# Verify home directory is writable +touch ~/test-write && rm ~/test-write && echo "✓ Home directory is writable" + +# Check available disk space +df -h ~ +``` + +**If you get permission errors:** + +``` +Error: Failed to create/access config directory +Please ensure: + 1. Home directory is accessible + 2. You have write permissions to home directory + 3. Disk space is available +``` + +**Solution:** + +- Ensure you're not running with overly restrictive umask: `umask` should show `0022` or similar +- Check home directory permissions: `ls -ld ~` should show your user as owner +- For sandboxed/containerized environments: Ensure `/home/` is writable + --- ## Quick Start @@ -110,7 +179,7 @@ make build ```bash make run-node # Or manually: -go run ./cmd/node --config configs/bootstrap.yaml +go run ./cmd/node --config configs/node.yaml ``` ### 4. Start Additional Nodes @@ -178,212 +247,141 @@ sudo journalctl -u debros-node.service -f ## Configuration -### Example Configuration Files +### Configuration Files Location -#### `configs/bootstrap.yaml` +All configuration files are stored in `~/.debros/` for both local development and production deployments: -```yaml -node: - id: "" - type: "bootstrap" - listen_addresses: - - "/ip4/0.0.0.0/tcp/4001" - data_dir: "./data/bootstrap" - max_connections: 100 +- `~/.debros/node.yaml` - Node configuration +- `~/.debros/node.yaml` - Bootstrap node configuration +- `~/.debros/gateway.yaml` - Gateway configuration -database: - data_dir: "./data/bootstrap/rqlite" - replication_factor: 3 - shard_count: 16 - max_database_size: 1073741824 - backup_interval: 24h - rqlite_port: 5001 - rqlite_raft_port: 7001 - rqlite_join_address: "" +The system will **only** load config from `~/.debros/` and will error if required config files are missing. -discovery: - bootstrap_peers: [] - discovery_interval: 15s - bootstrap_port: 4001 - http_adv_address: "127.0.0.1" - raft_adv_address: "" +### Generating Configuration Files -security: - enable_tls: false - private_key_file: "" - certificate_file: "" +Use the `network-cli config init` command to generate configuration files: -logging: - level: "info" - format: "console" - output_file: "" +#### Generate a Node Config + +```bash +# Generate basic node config with bootstrap peers +network-cli config init --type node --bootstrap-peers "/ip4/127.0.0.1/tcp/4001/p2p/QmXxx,/ip4/127.0.0.1/tcp/4002/p2p/QmYyy" + +# With custom ports +network-cli config init --type node --name node2.yaml --listen-port 4002 --rqlite-http-port 5002 --rqlite-raft-port 7002 --join localhost:5001 --bootstrap-peers "/ip4/127.0.0.1/tcp/4001/p2p/QmXxx" + +# Force overwrite existing config +network-cli config init --type node --force ``` -#### `configs/node.yaml` +#### Generate a Bootstrap Node Config -```yaml -node: - id: "node2" - type: "node" - listen_addresses: - - "/ip4/0.0.0.0/tcp/4002" - data_dir: "./data/node2" - max_connections: 50 +```bash +# Generate bootstrap node (no join address required) +network-cli config init --type bootstrap -database: - data_dir: "./data/node2/rqlite" - replication_factor: 3 - shard_count: 16 - max_database_size: 1073741824 - backup_interval: 24h - rqlite_port: 5002 - rqlite_raft_port: 7002 - rqlite_join_address: "127.0.0.1:7001" - -discovery: - bootstrap_peers: - - "/ip4/127.0.0.1/tcp/4001/p2p/" - discovery_interval: 15s - bootstrap_port: 4002 - http_adv_address: "127.0.0.1" - raft_adv_address: "" - -security: - enable_tls: false - private_key_file: "" - certificate_file: "" - -logging: - level: "info" - format: "console" - output_file: "" +# With custom ports +network-cli config init --type bootstrap --listen-port 4001 --rqlite-http-port 5001 --rqlite-raft-port 7001 ``` -### YAML Reference +#### Generate a Gateway Config -#### Node YAML (configs/node.yaml or configs/bootstrap.yaml) +```bash +# Generate gateway config +network-cli config init --type gateway -The .yaml files are required in order for the nodes and the gateway to run correctly. - -node: - -- id (string) Optional node ID. Auto-generated if empty. -- type (string) "bootstrap" or "node". Default: "node". -- listen_addresses (string[]) LibP2P listen multiaddrs. Default: ["/ip4/0.0.0.0/tcp/4001"]. -- data_dir (string) Data directory. Default: "./data". -- max_connections (int) Max peer connections. Default: 50. - -database: - -- data_dir (string) Directory for database files. Default: "./data/db". -- replication_factor (int) Number of replicas. Default: 3. -- shard_count (int) Shards for data distribution. Default: 16. -- max_database_size (int64 bytes) Max DB size. Default: 1073741824 (1GB). -- backup_interval (duration) e.g., "24h". Default: 24h. -- rqlite_port (int) RQLite HTTP API port. Default: 5001. -- rqlite_raft_port (int) RQLite Raft port. Default: 7001. -- rqlite_join_address (string) Raft address of an existing RQLite node to join (host:port format). Empty for bootstrap. - -discovery: - -- bootstrap_peers (string[]) List of LibP2P multiaddrs of bootstrap peers. -- discovery_interval (duration) How often to announce/discover peers. Default: 15s. -- bootstrap_port (int) Default port for bootstrap nodes. Default: 4001. -- http_adv_address (string) Advertised HTTP address for RQLite (host:port). -- raft_adv_address (string) Advertised Raft address (host:port). -- node_namespace (string) Namespace for node identifiers. Default: "default". - -security: - -- enable_tls (bool) Enable TLS for externally exposed services. Default: false. -- private_key_file (string) Path to TLS private key (if TLS enabled). -- certificate_file (string) Path to TLS certificate (if TLS enabled). - -logging: - -- level (string) one of "debug", "info", "warn", "error". Default: "info". -- format (string) "json" or "console". Default: "console". -- output_file (string) Empty for stdout; otherwise path to log file. - -Precedence (node): Flags > YAML > Defaults. - -Example node.yaml - -```yaml -node: - id: "node2" - type: "node" - listen_addresses: - - "/ip4/0.0.0.0/tcp/4002" - data_dir: "./data/node2" - max_connections: 50 - -database: - data_dir: "./data/node2/rqlite" - replication_factor: 3 - shard_count: 16 - max_database_size: 1073741824 - backup_interval: 24h - rqlite_port: 5002 - rqlite_raft_port: 7002 - rqlite_join_address: "127.0.0.1:7001" - -discovery: - bootstrap_peers: - - "/ip4/127.0.0.1/tcp/4001/p2p/" - discovery_interval: 15s - bootstrap_port: 4001 - http_adv_address: "127.0.0.1" - raft_adv_address: "" - node_namespace: "default" - -security: - enable_tls: false - private_key_file: "" - certificate_file: "" - -logging: - level: "info" - format: "console" - output_file: "" +# With bootstrap peers +network-cli config init --type gateway --bootstrap-peers "/ip4/127.0.0.1/tcp/4001/p2p/QmXxx" ``` -#### Gateway YAML (configs/gateway.yaml) +### Running Multiple Nodes on the Same Machine -- listen_addr (string) HTTP listen address, e.g., ":6001". Default: ":6001". -- client_namespace (string) Namespace used by the gateway client. Default: "default". -- bootstrap_peers (string[]) List of bootstrap peer multiaddrs. Default: empty. +You can run multiple nodes on a single machine by creating separate configuration files and using the `--config` flag: -Precedence (gateway): Flags > Environment Variables > YAML > Defaults. -Environment variables: +#### Create Multiple Node Configs -- GATEWAY_ADDR -- GATEWAY_NAMESPACE -- GATEWAY_BOOTSTRAP_PEERS (comma-separated) +```bash +# Node 1 +./bin/network-cli config init --type node --name node1.yaml \ + --listen-port 4001 --rqlite-http-port 5001 --rqlite-raft-port 7001 \ + --bootstrap-peers "/ip4/127.0.0.1/tcp/4001/p2p/" -Example gateway.yaml +# Node 2 +./bin/network-cli config init --type node --name node2.yaml \ + --listen-port 4002 --rqlite-http-port 5002 --rqlite-raft-port 7002 \ + --join localhost:5001 \ + --bootstrap-peers "/ip4/127.0.0.1/tcp/4001/p2p/" -```yaml -listen_addr: ":6001" -client_namespace: "default" -bootstrap_peers: - - "" +# Node 3 +./bin/network-cli config init --type node --name node3.yaml \ + --listen-port 4003 --rqlite-http-port 5003 --rqlite-raft-port 7003 \ + --join localhost:5001 \ + --bootstrap-peers "/ip4/127.0.0.1/tcp/4001/p2p/" ``` -### Flags & Environment Variables +#### Run Multiple Nodes in Separate Terminals -- **Flags**: Override config at startup (`--data`, `--p2p-port`, `--rqlite-http-port`, etc.) -- **Env Vars**: Override config and flags (`NODE_ID`, `RQLITE_PORT`, `BOOTSTRAP_PEERS`, etc.) -- **Precedence (gateway)**: Flags > Env Vars > YAML > Defaults -- **Precedence (node)**: Flags > YAML > Defaults +```bash +# Terminal 1 - Bootstrap node +go run ./cmd/node --config bootstrap.yaml -### Bootstrap & Database Endpoints +# Terminal 2 - Node 1 +go run ./cmd/node --config node1.yaml -- **Bootstrap peers**: Set in config or via `BOOTSTRAP_PEERS` env var. -- **Database endpoints**: Set in config or via `RQLITE_NODES` env var. -- **Development mode**: Use `NETWORK_DEV_LOCAL=1` for localhost defaults. +# Terminal 3 - Node 2 +go run ./cmd/node --config node2.yaml -### Configuration Validation +# Terminal 4 - Node 3 +go run ./cmd/node --config node3.yaml +``` + +#### Or Use Makefile Targets + +```bash +# Terminal 1 +make run-node # Runs: go run ./cmd/node --config bootstrap.yaml + +# Terminal 2 +make run-node2 # Runs: go run ./cmd/node --config node.yaml + +# Terminal 3 +make run-node3 # Runs: go run ./cmd/node --config node2.yaml +``` + +#### Key Points for Multiple Nodes + +- **Each node needs unique ports**: P2P port, RQLite HTTP port, and RQLite Raft port must all be different +- **Join address**: Non-bootstrap nodes need `rqlite_join_address` pointing to the bootstrap or an existing node +- **Bootstrap peers**: All nodes need the bootstrap node's multiaddr in `discovery.bootstrap_peers` +- **Config files**: Store all configs in `~/.debros/` with different filenames +- **--config flag**: Specify which config file to load (defaults to `node.yaml`) + +⚠️ **Common Mistake - Same Ports:** +If all nodes use the same ports (e.g., 5001, 7001), they will try to bind to the same addresses and fail to communicate. Verify each node has unique ports: + +```bash +# Bootstrap +grep "rqlite_port\|rqlite_raft_port" ~/.debros/bootstrap.yaml +# Should show: rqlite_port: 5001, rqlite_raft_port: 7001 + +# Node 2 +grep "rqlite_port\|rqlite_raft_port" ~/.debros/node.yaml +# Should show: rqlite_port: 5002, rqlite_raft_port: 7002 + +# Node 3 +grep "rqlite_port\|rqlite_raft_port" ~/.debros/node2.yaml +# Should show: rqlite_port: 5003, rqlite_raft_port: 7003 +``` + +If ports are wrong, regenerate the config with `--force`: + +```bash +./bin/network-cli config init --type node --name node.yaml \ + --listen-port 4002 --rqlite-http-port 5002 --rqlite-raft-port 7002 \ + --join localhost:5001 --bootstrap-peers '' --force +``` + +### Validating Configuration DeBros Network performs strict validation of all configuration files at startup. This ensures invalid configurations are caught immediately rather than causing silent failures later. @@ -757,4 +755,196 @@ GET /v1/pubsub/topics # List active topics - **PubSub** - WS Subscribe: `GET /v1/pubsub/ws?topic=` - Publish: `POST /v1/pubsub/publish` `{topic, data_base64}` → `{status:"ok"}` - - Topics: `GET /v1/pubsub/topics` → ` \ No newline at end of file + - Topics: `GET /v1/pubsub/topics` → `{topics:[...]}` + +--- + +## Troubleshooting + +### Configuration & Permissions + +**Error: "Failed to create/access config directory"** + +This happens when DeBros cannot access or create `~/.debros/` directory. + +**Causes:** +1. Home directory is not writable +2. Home directory doesn't exist +3. Filesystem is read-only (sandboxed/containerized environment) +4. Permission denied (running with wrong user/umask) + +**Solutions:** + +```bash +# Check home directory exists and is writable +ls -ld ~ +touch ~/test-write && rm ~/test-write + +# Check umask (should be 0022 or 0002) +umask + +# If umask is too restrictive, change it +umask 0022 + +# Check disk space +df -h ~ + +# For containerized environments, ensure /home/ is mounted with write permissions +docker run -v /home:/home --user $(id -u):$(id -g) debros-network +``` + +**Error: "Config file not found at ~/.debros/node.yaml"** + +The node requires a config file to exist before starting. + +**Solution:** + +Generate config files first: + +```bash +# Build CLI +make build + +# Generate configs +./bin/network-cli config init --type bootstrap +./bin/network-cli config init --type node --bootstrap-peers '' +./bin/network-cli config init --type gateway +``` + +### Node Startup Issues + +**Error: "node.data_dir: parent directory not writable"** + +The data directory parent is not accessible. + +**Solution:** + +Ensure `~/.debros` is writable and has at least 10GB free space: + +```bash +# Check permissions +ls -ld ~/.debros + +# Check available space +df -h ~/.debros + +# Recreate if corrupted +rm -rf ~/.debros +./bin/network-cli config init --type bootstrap +``` + +**Error: "failed to create data directory"** + +The node cannot create its data directory in `~/.debros`. + +**Causes:** +1. `~/.debros` is not writable +2. Parent directory path in config uses `~` which isn't expanded properly +3. Disk is full + +**Solutions:** + +```bash +# Check ~/.debros exists and is writable +mkdir -p ~/.debros +ls -ld ~/.debros + +# Verify data_dir in config uses ~ (e.g., ~/.debros/node) +cat ~/.debros/node.yaml | grep data_dir + +# Check disk space +df -h ~ + +# Ensure user owns ~/.debros +chown -R $(whoami) ~/.debros + +# Retry node startup +make run-node +``` + +**Error: "stat ~/.debros: no such file or directory"** + +**Port Already in Use** + +If you get "address already in use" errors: + +```bash +# Find processes using ports +lsof -i :4001 # P2P port +lsof -i :5001 # RQLite HTTP +lsof -i :7001 # RQLite Raft + +# Kill if needed +kill -9 + +# Or use different ports in config +./bin/network-cli config init --type node --listen-port 4002 --rqlite-http-port 5002 --rqlite-raft-port 7002 +``` + +### Common Configuration Errors + +**Error: "discovery.bootstrap_peers: required for node type"** + +Nodes (non-bootstrap) must specify bootstrap peers to discover the network. + +**Solution:** + +Generate node config with bootstrap peers: + +```bash +./bin/network-cli config init --type node --bootstrap-peers '/ip4/127.0.0.1/tcp/4001/p2p/12D3KooW...' +``` + +**Error: "database.rqlite_join_address: required for node type"** + +Non-bootstrap nodes must specify which node to join in the Raft cluster. + +**Solution:** + +Generate config with join address: + +```bash +./bin/network-cli config init --type node --join localhost:5001 +``` + +**Error: "database.rqlite_raft_port: must differ from database.rqlite_port"** + +HTTP and Raft ports cannot be the same. + +**Solution:** + +Use different ports (RQLite HTTP and Raft must be on different ports): + +```bash +./bin/network-cli config init --type node \ + --rqlite-http-port 5001 \ + --rqlite-raft-port 7001 +``` + +### Peer Discovery Issues + +If nodes can't find each other: + +1. **Verify bootstrap node is running:** + ```bash + ./bin/network-cli health + ./bin/network-cli peers + ``` + +2. **Check bootstrap peer multiaddr is correct:** + ```bash + cat ~/.debros/bootstrap/peer.info # On bootstrap node + # Should match value in other nodes' discovery.bootstrap_peers + ``` + +3. **Ensure all nodes have same bootstrap peers in config** + +4. **Check firewall/network:** + ```bash + # Verify P2P port is open + nc -zv 127.0.0.1 4001 + ``` + +--- + +## License \ No newline at end of file diff --git a/cmd/cli/main.go b/cmd/cli/main.go index f9acc47..0399ec0 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -7,12 +7,14 @@ import ( "log" "os" "os/exec" + "path/filepath" "strconv" "strings" "time" "github.com/DeBrosOfficial/network/pkg/auth" "github.com/DeBrosOfficial/network/pkg/client" + "github.com/DeBrosOfficial/network/pkg/config" "github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/peer" ) @@ -78,6 +80,8 @@ func main() { handlePeerID() case "auth": handleAuth(args) + case "config": + handleConfig(args) case "help", "--help", "-h": showHelp() @@ -576,6 +580,333 @@ func isPrintableText(s string) bool { return len(s) > 0 && float64(printableCount)/float64(len(s)) > 0.8 } +func handleConfig(args []string) { + if len(args) == 0 { + showConfigHelp() + return + } + + subcommand := args[0] + subargs := args[1:] + + switch subcommand { + case "init": + handleConfigInit(subargs) + case "validate": + handleConfigValidate(subargs) + case "help": + showConfigHelp() + default: + fmt.Fprintf(os.Stderr, "Unknown config subcommand: %s\n", subcommand) + showConfigHelp() + os.Exit(1) + } +} + +func showConfigHelp() { + fmt.Printf("Config Management Commands\n\n") + fmt.Printf("Usage: network-cli config [options]\n\n") + fmt.Printf("Subcommands:\n") + fmt.Printf(" init - Generate configuration files in ~/.debros\n") + fmt.Printf(" validate --name - Validate a config file\n\n") + fmt.Printf("Init Options:\n") + fmt.Printf(" --type - Config type: node, bootstrap, gateway (default: node)\n") + fmt.Printf(" --name - Output filename (default: node.yaml)\n") + fmt.Printf(" --id - Node ID for bootstrap peers\n") + fmt.Printf(" --listen-port - LibP2P listen port (default: 4001)\n") + fmt.Printf(" --rqlite-http-port - RQLite HTTP port (default: 5001)\n") + fmt.Printf(" --rqlite-raft-port - RQLite Raft port (default: 7001)\n") + fmt.Printf(" --join - RQLite address to join (required for non-bootstrap)\n") + fmt.Printf(" --bootstrap-peers - Comma-separated bootstrap peer multiaddrs\n") + fmt.Printf(" --force - Overwrite existing config\n\n") + fmt.Printf("Examples:\n") + fmt.Printf(" network-cli config init\n") + fmt.Printf(" network-cli config init --type node --bootstrap-peers /ip4/127.0.0.1/tcp/4001/p2p/QmXxx,/ip4/127.0.0.1/tcp/4002/p2p/QmYyy\n") + fmt.Printf(" network-cli config init --type bootstrap\n") + fmt.Printf(" network-cli config init --type gateway\n") + fmt.Printf(" network-cli config validate --name node.yaml\n") +} + +func handleConfigInit(args []string) { + // Parse flags + var ( + cfgType = "node" + name = "node.yaml" + id string + listenPort = 4001 + rqliteHTTPPort = 5001 + rqliteRaftPort = 7001 + joinAddr string + bootstrapPeers string + force bool + ) + + for i := 0; i < len(args); i++ { + switch args[i] { + case "--type": + if i+1 < len(args) { + cfgType = args[i+1] + i++ + } + case "--name": + if i+1 < len(args) { + name = args[i+1] + i++ + } + case "--id": + if i+1 < len(args) { + id = args[i+1] + i++ + } + case "--listen-port": + if i+1 < len(args) { + if p, err := strconv.Atoi(args[i+1]); err == nil { + listenPort = p + } + i++ + } + case "--rqlite-http-port": + if i+1 < len(args) { + if p, err := strconv.Atoi(args[i+1]); err == nil { + rqliteHTTPPort = p + } + i++ + } + case "--rqlite-raft-port": + if i+1 < len(args) { + if p, err := strconv.Atoi(args[i+1]); err == nil { + rqliteRaftPort = p + } + i++ + } + case "--join": + if i+1 < len(args) { + joinAddr = args[i+1] + i++ + } + case "--bootstrap-peers": + if i+1 < len(args) { + bootstrapPeers = args[i+1] + i++ + } + case "--force": + force = true + } + } + + // Validate type + if cfgType != "node" && cfgType != "bootstrap" && cfgType != "gateway" { + fmt.Fprintf(os.Stderr, "Invalid --type: %s (expected: node, bootstrap, or gateway)\n", cfgType) + os.Exit(1) + } + + // Ensure config directory exists + configDir, err := config.EnsureConfigDir() + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to ensure config directory: %v\n", err) + os.Exit(1) + } + + configPath := filepath.Join(configDir, name) + + // Check if file exists + if !force { + if _, err := os.Stat(configPath); err == nil { + fmt.Fprintf(os.Stderr, "Config file already exists at %s (use --force to overwrite)\n", configPath) + os.Exit(1) + } + } + + // Generate config based on type + var configContent string + switch cfgType { + case "node": + configContent = generateNodeConfig(name, id, listenPort, rqliteHTTPPort, rqliteRaftPort, joinAddr, bootstrapPeers) + case "bootstrap": + configContent = generateBootstrapConfig(name, id, listenPort, rqliteHTTPPort, rqliteRaftPort) + case "gateway": + configContent = generateGatewayConfig() + } + + // Write config file + if err := os.WriteFile(configPath, []byte(configContent), 0644); err != nil { + fmt.Fprintf(os.Stderr, "Failed to write config file: %v\n", err) + os.Exit(1) + } + + fmt.Printf("✅ Configuration file created: %s\n", configPath) + fmt.Printf(" Type: %s\n", cfgType) + fmt.Printf("\nYou can now start the %s using the generated config.\n", cfgType) +} + +func handleConfigValidate(args []string) { + var name string + for i := 0; i < len(args); i++ { + if args[i] == "--name" && i+1 < len(args) { + name = args[i+1] + i++ + } + } + + if name == "" { + fmt.Fprintf(os.Stderr, "Missing --name flag\n") + showConfigHelp() + os.Exit(1) + } + + configDir, err := config.ConfigDir() + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to get config directory: %v\n", err) + os.Exit(1) + } + + configPath := filepath.Join(configDir, name) + file, err := os.Open(configPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to open config file: %v\n", err) + os.Exit(1) + } + defer file.Close() + + var cfg config.Config + if err := config.DecodeStrict(file, &cfg); err != nil { + fmt.Fprintf(os.Stderr, "Failed to parse config: %v\n", err) + os.Exit(1) + } + + // Run validation + errs := cfg.Validate() + if len(errs) > 0 { + fmt.Fprintf(os.Stderr, "\n❌ Configuration errors (%d):\n", len(errs)) + for _, err := range errs { + fmt.Fprintf(os.Stderr, " - %s\n", err) + } + os.Exit(1) + } + + fmt.Printf("✅ Config is valid: %s\n", configPath) +} + +func generateNodeConfig(name, id string, listenPort, rqliteHTTPPort, rqliteRaftPort int, joinAddr, bootstrapPeers string) string { + nodeID := id + if nodeID == "" { + nodeID = fmt.Sprintf("node-%d", time.Now().Unix()) + } + + // Parse bootstrap peers + var peers []string + if bootstrapPeers != "" { + for _, p := range strings.Split(bootstrapPeers, ",") { + if p = strings.TrimSpace(p); p != "" { + peers = append(peers, p) + } + } + } + + // Construct data_dir from name stem (remove .yaml) + dataDir := strings.TrimSuffix(name, ".yaml") + dataDir = filepath.Join(os.ExpandEnv("~"), ".debros", dataDir) + + var peersYAML strings.Builder + if len(peers) == 0 { + peersYAML.WriteString(" bootstrap_peers: []") + } else { + peersYAML.WriteString(" bootstrap_peers:\n") + for _, p := range peers { + fmt.Fprintf(&peersYAML, " - \"%s\"\n", p) + } + } + + if joinAddr == "" { + joinAddr = "localhost:5001" + } + + return fmt.Sprintf(`node: + id: "%s" + type: "node" + listen_addresses: + - "/ip4/0.0.0.0/tcp/%d" + data_dir: "%s" + max_connections: 50 + +database: + data_dir: "%s/rqlite" + replication_factor: 3 + shard_count: 16 + max_database_size: 1073741824 + backup_interval: "24h" + rqlite_port: %d + rqlite_raft_port: %d + rqlite_join_address: "%s" + +discovery: +%s + discovery_interval: "15s" + bootstrap_port: %d + http_adv_address: "127.0.0.1:%d" + raft_adv_address: "127.0.0.1:%d" + node_namespace: "default" + +security: + enable_tls: false + +logging: + level: "info" + format: "console" +`, nodeID, listenPort, dataDir, dataDir, rqliteHTTPPort, rqliteRaftPort, joinAddr, peersYAML.String(), 4001, rqliteHTTPPort, rqliteRaftPort) +} + +func generateBootstrapConfig(name, id string, listenPort, rqliteHTTPPort, rqliteRaftPort int) string { + nodeID := id + if nodeID == "" { + nodeID = "bootstrap" + } + + dataDir := filepath.Join(os.ExpandEnv("~"), ".debros", "bootstrap") + + return fmt.Sprintf(`node: + id: "%s" + type: "bootstrap" + listen_addresses: + - "/ip4/0.0.0.0/tcp/%d" + data_dir: "%s" + max_connections: 50 + +database: + data_dir: "%s/rqlite" + replication_factor: 3 + shard_count: 16 + max_database_size: 1073741824 + backup_interval: "24h" + rqlite_port: %d + rqlite_raft_port: %d + rqlite_join_address: "" + +discovery: + bootstrap_peers: [] + discovery_interval: "15s" + bootstrap_port: %d + http_adv_address: "127.0.0.1:%d" + raft_adv_address: "127.0.0.1:%d" + node_namespace: "default" + +security: + enable_tls: false + +logging: + level: "info" + format: "console" +`, nodeID, listenPort, dataDir, dataDir, rqliteHTTPPort, rqliteRaftPort, 4001, rqliteHTTPPort, rqliteRaftPort) +} + +func generateGatewayConfig() string { + return `listen_addr: ":6001" +client_namespace: "default" +rqlite_dsn: "" +bootstrap_peers: [] +` +} + func showHelp() { fmt.Printf("Network CLI - Distributed P2P Network Management Tool\n\n") fmt.Printf("Usage: network-cli [args...]\n\n") @@ -591,6 +922,7 @@ func showHelp() { fmt.Printf(" pubsub subscribe [duration] 🔐 Subscribe to topic\n") fmt.Printf(" pubsub topics 🔐 List topics\n") fmt.Printf(" connect - Connect to peer\n") + fmt.Printf(" config - Show current configuration\n") fmt.Printf(" help - Show this help\n\n") fmt.Printf("Global Flags:\n") diff --git a/cmd/gateway/config.go b/cmd/gateway/config.go index a32619e..76548da 100644 --- a/cmd/gateway/config.go +++ b/cmd/gateway/config.go @@ -1,7 +1,6 @@ package main import ( - "flag" "fmt" "os" "strings" @@ -38,10 +37,43 @@ func getEnvBoolDefault(key string, def bool) bool { } } -// parseGatewayConfig loads optional configs/gateway.yaml then applies env and flags. -// Priority: flags > env > yaml > defaults. +// parseGatewayConfig loads gateway.yaml from ~/.debros exclusively. func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config { - // Base defaults + // Determine config path + configPath, err := config.DefaultPath("gateway.yaml") + if err != nil { + logger.ComponentError(logging.ComponentGeneral, "Failed to determine config path", zap.Error(err)) + fmt.Fprintf(os.Stderr, "Configuration error: %v\n", err) + os.Exit(1) + } + + // Load YAML + type yamlCfg struct { + ListenAddr string `yaml:"listen_addr"` + ClientNamespace string `yaml:"client_namespace"` + RQLiteDSN string `yaml:"rqlite_dsn"` + BootstrapPeers []string `yaml:"bootstrap_peers"` + } + + data, err := os.ReadFile(configPath) + if err != nil { + logger.ComponentError(logging.ComponentGeneral, "Config file not found", + zap.String("path", configPath), + zap.Error(err)) + fmt.Fprintf(os.Stderr, "\nConfig file not found at %s\n", configPath) + fmt.Fprintf(os.Stderr, "Generate it using: network-cli config init --type gateway\n") + os.Exit(1) + } + + var y yamlCfg + // Use strict YAML decoding to reject unknown fields + if err := config.DecodeStrict(strings.NewReader(string(data)), &y); err != nil { + logger.ComponentError(logging.ComponentGeneral, "Failed to parse gateway config", zap.Error(err)) + fmt.Fprintf(os.Stderr, "Configuration parse error: %v\n", err) + os.Exit(1) + } + + // Build config from YAML cfg := &gateway.Config{ ListenAddr: ":6001", ClientNamespace: "default", @@ -49,93 +81,26 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config { RQLiteDSN: "", } - // 1) YAML (optional) - { - type yamlCfg struct { - ListenAddr string `yaml:"listen_addr"` - ClientNamespace string `yaml:"client_namespace"` - RQLiteDSN string `yaml:"rqlite_dsn"` - BootstrapPeers []string `yaml:"bootstrap_peers"` - } - const path = "configs/gateway.yaml" - if data, err := os.ReadFile(path); err == nil { - var y yamlCfg - // Use strict YAML decoding to reject unknown fields - if err := config.DecodeStrict(strings.NewReader(string(data)), &y); err != nil { - logger.ComponentError(logging.ComponentGeneral, "failed to parse configs/gateway.yaml", zap.Error(err)) - fmt.Fprintf(os.Stderr, "Configuration load error: %v\n", err) - os.Exit(1) - } - if v := strings.TrimSpace(y.ListenAddr); v != "" { - cfg.ListenAddr = v - } - if v := strings.TrimSpace(y.ClientNamespace); v != "" { - cfg.ClientNamespace = v - } - if v := strings.TrimSpace(y.RQLiteDSN); v != "" { - cfg.RQLiteDSN = v - } - if len(y.BootstrapPeers) > 0 { - var bp []string - for _, p := range y.BootstrapPeers { - p = strings.TrimSpace(p) - if p != "" { - bp = append(bp, p) - } - } - if len(bp) > 0 { - cfg.BootstrapPeers = bp - } - } - } - } - - // 2) Env overrides - if v := strings.TrimSpace(os.Getenv("GATEWAY_ADDR")); v != "" { + if v := strings.TrimSpace(y.ListenAddr); v != "" { cfg.ListenAddr = v } - if v := strings.TrimSpace(os.Getenv("GATEWAY_NAMESPACE")); v != "" { + if v := strings.TrimSpace(y.ClientNamespace); v != "" { cfg.ClientNamespace = v } - if v := strings.TrimSpace(os.Getenv("GATEWAY_RQLITE_DSN")); v != "" { + if v := strings.TrimSpace(y.RQLiteDSN); v != "" { cfg.RQLiteDSN = v } - if v := strings.TrimSpace(os.Getenv("GATEWAY_BOOTSTRAP_PEERS")); v != "" { - parts := strings.Split(v, ",") + if len(y.BootstrapPeers) > 0 { var bp []string - for _, part := range parts { - s := strings.TrimSpace(part) - if s != "" { - bp = append(bp, s) + for _, p := range y.BootstrapPeers { + p = strings.TrimSpace(p) + if p != "" { + bp = append(bp, p) } } - cfg.BootstrapPeers = bp - } - - // 3) Flags (override env) - addr := flag.String("addr", "", "HTTP listen address (e.g., :6001)") - ns := flag.String("namespace", "", "Client namespace for scoping resources") - peers := flag.String("bootstrap-peers", "", "Comma-separated bootstrap peers for network client") - - // Do not call flag.Parse() elsewhere to avoid double-parsing - flag.Parse() - - if a := strings.TrimSpace(*addr); a != "" { - cfg.ListenAddr = a - } - if n := strings.TrimSpace(*ns); n != "" { - cfg.ClientNamespace = n - } - if p := strings.TrimSpace(*peers); p != "" { - parts := strings.Split(p, ",") - var bp []string - for _, part := range parts { - s := strings.TrimSpace(part) - if s != "" { - bp = append(bp, s) - } + if len(bp) > 0 { + cfg.BootstrapPeers = bp } - cfg.BootstrapPeers = bp } // Validate configuration @@ -148,7 +113,8 @@ func parseGatewayConfig(logger *logging.ColoredLogger) *gateway.Config { os.Exit(1) } - logger.ComponentInfo(logging.ComponentGeneral, "Loaded gateway configuration", + logger.ComponentInfo(logging.ComponentGeneral, "Loaded gateway configuration from YAML", + zap.String("path", configPath), zap.String("addr", cfg.ListenAddr), zap.String("namespace", cfg.ClientNamespace), zap.Int("bootstrap_peer_count", len(cfg.BootstrapPeers)), diff --git a/cmd/node/main.go b/cmd/node/main.go index 70646ef..5d469b1 100644 --- a/cmd/node/main.go +++ b/cmd/node/main.go @@ -7,6 +7,8 @@ import ( "os" "os/signal" "path/filepath" + "strconv" + "strings" "syscall" "github.com/DeBrosOfficial/network/pkg/config" @@ -29,15 +31,8 @@ func setup_logger(component logging.Component) (logger *logging.ColoredLogger) { } // parse_flags parses command-line flags and returns them. -func parse_flags() (configPath, dataDir, nodeID *string, p2pPort, rqlHTTP, rqlRaft *int, rqlJoinAddr, advAddr *string, help *bool) { - configPath = flag.String("config", "", "Path to config YAML file (overrides defaults)") - dataDir = flag.String("data", "", "Data directory (auto-detected if not provided)") - nodeID = flag.String("id", "", "Node identifier (for running multiple local nodes)") - p2pPort = flag.Int("p2p-port", 4001, "LibP2P listen port") - rqlHTTP = flag.Int("rqlite-http-port", 5001, "RQLite HTTP API port") - rqlRaft = flag.Int("rqlite-raft-port", 7001, "RQLite Raft port") - rqlJoinAddr = flag.String("rqlite-join-address", "", "RQLite address to join (e.g., /ip4/)") - advAddr = flag.String("adv-addr", "127.0.0.1", "Default Advertise address for rqlite and rafts") +func parse_flags() (configName *string, help *bool) { + configName = flag.String("config", "node.yaml", "Config filename in ~/.debros (default: node.yaml)") help = flag.Bool("help", false, "Show help") flag.Parse() @@ -67,18 +62,39 @@ func check_if_should_open_help(help *bool) { } } -// select_data_dir selects the data directory for the node -// If none of (hasConfigFile, nodeID, dataDir) are present, throw an error and do not start -func select_data_dir(dataDir *string, nodeID *string, hasConfigFile bool) { +// select_data_dir validates that we can load the config from ~/.debros +func select_data_dir_check(configName *string) { logger := setup_logger(logging.ComponentNode) - if !hasConfigFile && (*nodeID == "" || nodeID == nil) && (*dataDir == "" || dataDir == nil) { - logger.Error("No config file, node ID, or data directory specified. Please provide at least one. Refusing to start.") + // Ensure config directory exists and is writable + _, err := config.EnsureConfigDir() + if err != nil { + logger.Error("Failed to ensure config directory", zap.Error(err)) + fmt.Fprintf(os.Stderr, "\n❌ Configuration Error:\n") + fmt.Fprintf(os.Stderr, "Failed to create/access config directory: %v\n", err) + fmt.Fprintf(os.Stderr, "\nPlease ensure:\n") + fmt.Fprintf(os.Stderr, " 1. Home directory is accessible: %s\n", os.ExpandEnv("~")) + fmt.Fprintf(os.Stderr, " 2. You have write permissions to home directory\n") + fmt.Fprintf(os.Stderr, " 3. Disk space is available\n") os.Exit(1) } - if *dataDir != "" { - logger.Info("Data directory selected: %s", zap.String("dataDir", *dataDir)) + configPath, err := config.DefaultPath(*configName) + if err != nil { + logger.Error("Failed to determine config path", zap.Error(err)) + os.Exit(1) + } + + if _, err := os.Stat(configPath); err != nil { + logger.Error("Config file not found", + zap.String("path", configPath), + zap.Error(err)) + fmt.Fprintf(os.Stderr, "\n❌ Configuration Error:\n") + fmt.Fprintf(os.Stderr, "Config file not found at %s\n", configPath) + fmt.Fprintf(os.Stderr, "\nGenerate it with one of:\n") + fmt.Fprintf(os.Stderr, " network-cli config init --type bootstrap\n") + fmt.Fprintf(os.Stderr, " network-cli config init --type node --bootstrap-peers ''\n") + os.Exit(1) } } @@ -97,9 +113,21 @@ func startNode(ctx context.Context, cfg *config.Config, port int) error { return err } + // Expand data directory path for peer.info file + dataDir := os.ExpandEnv(cfg.Node.DataDir) + if strings.HasPrefix(dataDir, "~") { + home, err := os.UserHomeDir() + if err != nil { + logger.Error("failed to determine home directory: %v", zap.Error(err)) + dataDir = cfg.Node.DataDir + } else { + dataDir = filepath.Join(home, dataDir[1:]) + } + } + // Save the peer ID to a file for CLI access (especially useful for bootstrap) peerID := n.GetPeerID() - peerInfoFile := filepath.Join(cfg.Node.DataDir, "peer.info") + peerInfoFile := filepath.Join(dataDir, "peer.info") peerMultiaddr := fmt.Sprintf("/ip4/0.0.0.0/tcp/%d/p2p/%s", port, peerID) if err := os.WriteFile(peerInfoFile, []byte(peerMultiaddr), 0644); err != nil { @@ -168,54 +196,107 @@ func printValidationErrors(errs []error) { os.Exit(1) } +// ensureDataDirectories ensures that all necessary data directories exist and have correct permissions. +func ensureDataDirectories(cfg *config.Config, logger *logging.ColoredLogger) error { + // Expand ~ in data_dir path + dataDir := os.ExpandEnv(cfg.Node.DataDir) + if strings.HasPrefix(dataDir, "~") { + home, err := os.UserHomeDir() + if err != nil { + return fmt.Errorf("failed to determine home directory: %w", err) + } + dataDir = filepath.Join(home, dataDir[1:]) + } + + // Ensure Node.DataDir exists and is writable + if err := os.MkdirAll(dataDir, 0755); err != nil { + return fmt.Errorf("failed to create data directory %s: %w", dataDir, err) + } + logger.ComponentInfo(logging.ComponentNode, "Data directory created/verified", zap.String("path", dataDir)) + + // Ensure RQLite data directory exists + rqliteDir := filepath.Join(dataDir, "rqlite") + if err := os.MkdirAll(rqliteDir, 0755); err != nil { + return fmt.Errorf("failed to create rqlite data directory: %w", err) + } + logger.ComponentInfo(logging.ComponentNode, "RQLite data directory created/verified", zap.String("path", rqliteDir)) + + return nil +} + func main() { logger := setup_logger(logging.ComponentNode) // Parse command-line flags - configPath, dataDir, nodeID, p2pPort, rqlHTTP, rqlRaft, rqlJoinAddr, advAddr, help := parse_flags() + configName, help := parse_flags() check_if_should_open_help(help) - select_data_dir(dataDir, nodeID, *configPath != "") - // Load configuration - var cfg *config.Config - if *configPath != "" { - // Load from YAML with strict decoding - var err error - cfg, err = LoadConfigFromYAML(*configPath) - if err != nil { - logger.Error("Failed to load config from YAML", zap.Error(err)) - fmt.Fprintf(os.Stderr, "Configuration load error: %v\n", err) - os.Exit(1) - } - logger.ComponentInfo(logging.ComponentNode, "Configuration loaded from YAML file", zap.String("path", *configPath)) - } else { - // Use default configuration - cfg = config.DefaultConfig() - logger.ComponentInfo(logging.ComponentNode, "Default configuration loaded successfully") + // Check if config file exists + select_data_dir_check(configName) + + // Load configuration from ~/.debros/node.yaml + configPath, err := config.DefaultPath(*configName) + if err != nil { + logger.Error("Failed to determine config path", zap.Error(err)) + fmt.Fprintf(os.Stderr, "Configuration error: %v\n", err) + os.Exit(1) } - // Apply command-line flag overrides - apply_flag_overrides(cfg, p2pPort, rqlHTTP, rqlRaft, rqlJoinAddr, advAddr, dataDir) - logger.ComponentInfo(logging.ComponentNode, "Command line arguments applied to configuration") + var cfg *config.Config + var cfgErr error + cfg, cfgErr = LoadConfigFromYAML(configPath) + if cfgErr != nil { + logger.Error("Failed to load config from YAML", zap.Error(cfgErr)) + fmt.Fprintf(os.Stderr, "Configuration load error: %v\n", cfgErr) + os.Exit(1) + } + logger.ComponentInfo(logging.ComponentNode, "Configuration loaded from YAML file", zap.String("path", configPath)) + + // Set default advertised addresses if empty + if cfg.Discovery.HttpAdvAddress == "" { + cfg.Discovery.HttpAdvAddress = fmt.Sprintf("127.0.0.1:%d", cfg.Database.RQLitePort) + } + if cfg.Discovery.RaftAdvAddress == "" { + cfg.Discovery.RaftAdvAddress = fmt.Sprintf("127.0.0.1:%d", cfg.Database.RQLiteRaftPort) + } // Validate configuration if errs := cfg.Validate(); len(errs) > 0 { printValidationErrors(errs) } - // LibP2P uses configurable port (default 4001); RQLite uses 5001 (HTTP) and 7001 (Raft) - port := *p2pPort + // Expand and create data directories + if err := ensureDataDirectories(cfg, logger); err != nil { + logger.Error("Failed to create data directories", zap.Error(err)) + fmt.Fprintf(os.Stderr, "\n❌ Data Directory Error:\n") + fmt.Fprintf(os.Stderr, "%v\n", err) + os.Exit(1) + } logger.ComponentInfo(logging.ComponentNode, "Node configuration summary", zap.Strings("listen_addresses", cfg.Node.ListenAddresses), zap.Int("rqlite_http_port", cfg.Database.RQLitePort), zap.Int("rqlite_raft_port", cfg.Database.RQLiteRaftPort), - zap.Int("p2p_port", port), zap.Strings("bootstrap_peers", cfg.Discovery.BootstrapPeers), zap.String("rqlite_join_address", cfg.Database.RQLiteJoinAddress), zap.String("data_directory", cfg.Node.DataDir)) + // Extract P2P port from listen addresses + p2pPort := 4001 // default + if len(cfg.Node.ListenAddresses) > 0 { + // Parse port from multiaddr like "/ip4/0.0.0.0/tcp/4001" + parts := strings.Split(cfg.Node.ListenAddresses[0], "/") + for i, part := range parts { + if part == "tcp" && i+1 < len(parts) { + if port, err := strconv.Atoi(parts[i+1]); err == nil { + p2pPort = port + break + } + } + } + } + // Create context for graceful shutdown ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -224,7 +305,7 @@ func main() { errChan := make(chan error, 1) doneChan := make(chan struct{}) go func() { - if err := startNode(ctx, cfg, port); err != nil { + if err := startNode(ctx, cfg, p2pPort); err != nil { errChan <- err } close(doneChan) diff --git a/pkg/config/config.go b/pkg/config/config.go index 85e595d..9ca73d9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -108,13 +108,7 @@ func DefaultConfig() *Config { RQLiteJoinAddress: "", // Empty for bootstrap node }, Discovery: DiscoveryConfig{ - BootstrapPeers: []string{ - "/ip4/127.0.0.1/tcp/4001/p2p/12D3KooWHbcFcrGPXKUrHcxvd8MXEeUzRYyvY8fQcpEBxncSUwhj", - // "/ip4/217.76.54.178/tcp/4001/p2p/12D3KooWKZnirPwNT4URtNSWK45f6vLkEs4xyUZ792F8Uj1oYnm1", - // "/ip4/51.83.128.181/tcp/4001/p2p/12D3KooWBn2Zf1R8v9pEfmz7hDZ5b3oADxfejA3zJBYzKRCzgvhR", - // "/ip4/155.133.27.199/tcp/4001/p2p/12D3KooWC69SBzM5QUgrLrfLWUykE8au32X5LwT7zwv9bixrQPm1", - // "/ip4/217.76.56.2/tcp/4001/p2p/12D3KooWEiqJHvznxqJ5p2y8mUs6Ky6dfU1xTYFQbyKRCABfcZz4", - }, + BootstrapPeers: []string{}, BootstrapPort: 4001, // Default LibP2P port DiscoveryInterval: time.Second * 15, // Back to 15 seconds for testing HttpAdvAddress: "", diff --git a/pkg/config/paths.go b/pkg/config/paths.go new file mode 100644 index 0000000..4e8ecec --- /dev/null +++ b/pkg/config/paths.go @@ -0,0 +1,38 @@ +package config + +import ( + "fmt" + "os" + "path/filepath" +) + +// ConfigDir returns the path to the DeBros config directory (~/.debros). +func ConfigDir() (string, error) { + home, err := os.UserHomeDir() + if err != nil { + return "", fmt.Errorf("failed to determine home directory: %w", err) + } + return filepath.Join(home, ".debros"), nil +} + +// EnsureConfigDir creates the config directory if it does not exist. +func EnsureConfigDir() (string, error) { + dir, err := ConfigDir() + if err != nil { + return "", err + } + if err := os.MkdirAll(dir, 0700); err != nil { + return "", fmt.Errorf("failed to create config directory %s: %w", dir, err) + } + return dir, nil +} + +// DefaultPath returns the path to the config file for the given component name. +// component should be e.g., "node.yaml", "bootstrap.yaml", "gateway.yaml" +func DefaultPath(component string) (string, error) { + dir, err := ConfigDir() + if err != nil { + return "", err + } + return filepath.Join(dir, component), nil +} diff --git a/pkg/config/validate.go b/pkg/config/validate.go index 6921ab4..045d784 100644 --- a/pkg/config/validate.go +++ b/pkg/config/validate.go @@ -456,25 +456,46 @@ func validateDataDir(path string) error { return fmt.Errorf("must not be empty") } - if info, err := os.Stat(path); err == nil { + // Expand ~ to home directory + expandedPath := os.ExpandEnv(path) + if strings.HasPrefix(expandedPath, "~") { + home, err := os.UserHomeDir() + if err != nil { + return fmt.Errorf("cannot determine home directory: %v", err) + } + expandedPath = filepath.Join(home, expandedPath[1:]) + } + + if info, err := os.Stat(expandedPath); err == nil { // Directory exists; check if it's a directory and writable if !info.IsDir() { return fmt.Errorf("path exists but is not a directory") } // Try to write a test file to check permissions - testFile := filepath.Join(path, ".write_test") + testFile := filepath.Join(expandedPath, ".write_test") if err := os.WriteFile(testFile, []byte(""), 0644); err != nil { return fmt.Errorf("directory not writable: %v", err) } os.Remove(testFile) } else if os.IsNotExist(err) { // Directory doesn't exist; check if parent is writable - parent := filepath.Dir(path) + parent := filepath.Dir(expandedPath) if parent == "" || parent == "." { parent = "." } - if err := validateDirWritable(parent); err != nil { - return fmt.Errorf("parent directory not writable: %v", err) + // Allow parent not existing - it will be created at runtime + if info, err := os.Stat(parent); err != nil { + if !os.IsNotExist(err) { + return fmt.Errorf("parent directory not accessible: %v", err) + } + // Parent doesn't exist either - that's ok, will be created + } else if !info.IsDir() { + return fmt.Errorf("parent path is not a directory") + } else { + // Parent exists, check if writable + if err := validateDirWritable(parent); err != nil { + return fmt.Errorf("parent directory not writable: %v", err) + } } } else { return fmt.Errorf("cannot access path: %v", err) diff --git a/pkg/rqlite/rqlite.go b/pkg/rqlite/rqlite.go index eca678d..d673662 100644 --- a/pkg/rqlite/rqlite.go +++ b/pkg/rqlite/rqlite.go @@ -69,8 +69,18 @@ func NewRQLiteManager(cfg *config.DatabaseConfig, discoveryCfg *config.Discovery // Start starts the RQLite node func (r *RQLiteManager) Start(ctx context.Context) error { + // Expand ~ in data directory path + dataDir := os.ExpandEnv(r.dataDir) + if strings.HasPrefix(dataDir, "~") { + home, err := os.UserHomeDir() + if err != nil { + return fmt.Errorf("failed to determine home directory: %w", err) + } + dataDir = filepath.Join(home, dataDir[1:]) + } + // Create data directory - rqliteDataDir := filepath.Join(r.dataDir, "rqlite") + rqliteDataDir := filepath.Join(dataDir, "rqlite") if err := os.MkdirAll(rqliteDataDir, 0755); err != nil { return fmt.Errorf("failed to create RQLite data directory: %w", err) } @@ -100,7 +110,7 @@ func (r *RQLiteManager) Start(ctx context.Context) error { } // Wait for join target to become reachable to avoid forming a separate cluster (wait indefinitely) - if err := r.waitForJoinTarget(ctx, joinArg, 0); err != nil { + if err := r.waitForJoinTarget(ctx, r.config.RQLiteJoinAddress, 0); err != nil { r.logger.Warn("Join target did not become reachable within timeout; will still attempt to join", zap.String("join_address", r.config.RQLiteJoinAddress), zap.Error(err)) @@ -126,7 +136,7 @@ func (r *RQLiteManager) Start(ctx context.Context) error { // Start RQLite process (not bound to ctx for graceful Stop handling) r.cmd = exec.Command("rqlited", args...) - // Uncomment if you want to see the stdout/stderr of the RQLite process + // Enable debug logging of RQLite process to help diagnose issues // r.cmd.Stdout = os.Stdout // r.cmd.Stderr = os.Stderr @@ -166,7 +176,15 @@ func (r *RQLiteManager) Start(ctx context.Context) error { } } else { r.logger.Info("Waiting for RQLite SQL availability (leader discovery)") - if err := r.waitForSQLAvailable(ctx); err != nil { + // For joining nodes, wait longer for SQL availability + sqlCtx := ctx + if _, hasDeadline := ctx.Deadline(); !hasDeadline { + // If no deadline in context, create one for SQL availability check + var cancel context.CancelFunc + sqlCtx, cancel = context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + } + if err := r.waitForSQLAvailable(sqlCtx); err != nil { if r.cmd != nil && r.cmd.Process != nil { _ = r.cmd.Process.Kill() } @@ -207,7 +225,9 @@ func (r *RQLiteManager) waitForReady(ctx context.Context) error { url := fmt.Sprintf("http://localhost:%d/status", r.config.RQLitePort) client := &http.Client{Timeout: 2 * time.Second} - for i := 0; i < 30; i++ { + // Give joining nodes more time (120 seconds vs 30) + maxAttempts := 30 + for i := 0; i < maxAttempts; i++ { select { case <-ctx.Done(): return ctx.Err()