homelab/scripts/bootstrap.sh
nathan e16f98a183 feat(bootstrap)!: introduce unified bootstrap system with modular libraries
BREAKING CHANGE: day0bootstrap.sh deprecated in favor of bootstrap.sh

- Add scripts/bootstrap.sh (488 lines): Unified entrypoint supporting multiple hardware types (Proxmox/Docker VMs/Pi)
- Create scripts/lib/ modular library system:
  - detection.sh: OS/hardware/container detection (362 lines)
  - fingerprint.sh: System fingerprinting and inventory (494 lines)
  - network.sh: IP configuration and VLAN placement (356 lines)
  - proxmox.sh: PVE post-install automation (453 lines)
  - validation.sh: Comprehensive pre-flight checks (510 lines)
- Add validation tools: validate-node.sh, onboarding.sh, pi_init.sh
- Deprecate scripts/day0bootstrap.sh with graceful redirect wrapper
- Document architecture in scripts/README.md (495 lines) and PROXMOX-COMPARISON.md
- Update SOP-002 with new bootstrap workflow
- Add nodes/watchtower/compose.yaml (Raspberry Pi 5 stack)

Migration: Existing day0bootstrap.sh users automatically redirected to new system after 5-second warning. No manual intervention required.

Ref: Infrastructure automation modernization per active-tasks.md
2026-04-12 22:48:19 -04:00

489 lines
16 KiB
Bash

#!/bin/bash
# ==============================================================================
# UNIFIED HOMELAB BOOTSTRAP SCRIPT
# ==============================================================================
# Intelligent day0 onboarding for all homelab hardware types
# Replaces: day0bootstrap.sh, pi_init.sh, onboarding.sh
#
# Usage:
# ./bootstrap.sh [OPTIONS]
#
# Options:
# --help Show this help message
# --dry-run Show what would be done without making changes
# --hardware-type TYPE Override auto-detection (proxmox|docker-vm|pi|physical-docker|ai-workstation)
# --skip-network Skip network configuration
# --skip-validation Skip post-bootstrap validation
# --output-json Generate JSON output instead of YAML
# --target-ip IP Target static IP address (default: auto-assigned)
#
# Examples:
# ./bootstrap.sh # Auto-detect and configure
# ./bootstrap.sh --dry-run # Preview actions
# ./bootstrap.sh --hardware-type proxmox # Force Proxmox mode
# ./bootstrap.sh --target-ip 10.0.0.205 # Custom IP address
#
# ==============================================================================
set -euo pipefail
# --- SCRIPT METADATA ---
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")"
VERSION="4.0.0"
TIMESTAMP=$(date +%Y%m%dT%H%M%S)
# --- LOAD LIBRARIES ---
# shellcheck source=./lib/detection.sh
source "${SCRIPT_DIR}/lib/detection.sh"
# shellcheck source=./lib/network.sh
source "${SCRIPT_DIR}/lib/network.sh"
# shellcheck source=./lib/validation.sh
source "${SCRIPT_DIR}/lib/validation.sh"
# shellcheck source=./lib/fingerprint.sh
source "${SCRIPT_DIR}/lib/fingerprint.sh"
# shellcheck source=./lib/proxmox.sh
source "${SCRIPT_DIR}/lib/proxmox.sh"
# --- COMMAND LINE ARGUMENTS ---
SHOW_HELP=false
DRY_RUN=false
HARDWARE_TYPE="auto"
SKIP_NETWORK=false
SKIP_VALIDATION=false
OUTPUT_JSON=false
TARGET_IP=""
GATEWAY="10.0.0.1"
DNS_SERVER="10.0.0.2"
parse_arguments() {
while [[ $# -gt 0 ]]; do
case "$1" in
--help|-h)
SHOW_HELP=true
shift
;;
--dry-run)
DRY_RUN=true
shift
;;
--hardware-type)
HARDWARE_TYPE="$2"
shift 2
;;
--skip-network)
SKIP_NETWORK=true
shift
;;
--skip-validation)
SKIP_VALIDATION=true
shift
;;
--output-json)
OUTPUT_JSON=true
shift
;;
--target-ip)
TARGET_IP="$2"
shift 2
;;
--gateway)
GATEWAY="$2"
shift 2
;;
--dns)
DNS_SERVER="$2"
shift 2
;;
*)
echo "ERROR: Unknown option: $1" >&2
echo "Use --help for usage information" >&2
exit 1
;;
esac
done
}
show_help() {
cat <<'EOF'
=============================================================================
UNIFIED HOMELAB BOOTSTRAP SCRIPT v4.0.0
=============================================================================
Intelligent day0 onboarding for all homelab hardware types.
Auto-detects OS, hardware type, and applies appropriate configuration.
USAGE:
./bootstrap.sh [OPTIONS]
OPTIONS:
--help Show this help message
--dry-run Preview actions without making changes
--hardware-type TYPE Override auto-detection
Types: proxmox, docker-vm, pi, physical-docker, ai-workstation
--skip-network Skip network configuration (use if already configured)
--skip-validation Skip post-bootstrap validation checks
--output-json Generate JSON output instead of YAML
--target-ip IP Set static IP address (default: auto-assigned)
--gateway IP Set gateway IP (default: 10.0.0.1)
--dns IP Set DNS server (default: 10.0.0.2)
EXAMPLES:
./bootstrap.sh
Auto-detect hardware and configure with defaults
./bootstrap.sh --dry-run
Preview what would be done without making changes
./bootstrap.sh --hardware-type proxmox --target-ip 10.0.0.201
Force Proxmox mode and set specific IP
./bootstrap.sh --skip-network
Skip network configuration (already configured manually)
WORKFLOW:
1. System Detection → Identify OS, hardware type, CPU, GPU
2. Network Config → Apply static IP via netplan (skippable)
3. Package Install → Docker, Ansible, proxmoxer (as needed)
4. SSH Keys → Generate/verify ED25519 keys
5. Validation → Comprehensive health checks
6. Fingerprinting → Generate hardware inventory YAML
OUTPUT FILES:
ansible/archive/outputs/bootstrap-validation-{hostname}-{timestamp}.log
ansible/archive/outputs/hardware-facts-{hostname}-{timestamp}.yml
ansible/archive/inventory/discovered-hosts.yml (auto-discovery)
NOTES:
- Network configuration will disconnect SSH (reconnect to new IP)
- Run from console or plan for reconnection
- Safe to re-run (idempotent where possible)
- Logs saved even if script interrupted
=============================================================================
EOF
}
# --- MAIN WORKFLOW ---
main() {
# Parse CLI arguments
parse_arguments "$@"
if [ "$SHOW_HELP" == "true" ]; then
show_help
exit 0
fi
# === PHASE 1: DETECTION ===
echo "======================================="
echo "HOMELAB BOOTSTRAP v${VERSION}"
echo "Timestamp: $(date -u +"%Y-%m-%d %H:%M:%S UTC")"
echo "======================================="
echo ""
echo "[PHASE 1] System Detection"
echo "---"
# Detect hardware type
if [ "$HARDWARE_TYPE" == "auto" ]; then
HARDWARE_TYPE=$(detect_hardware_type)
echo " Auto-detected hardware type: $HARDWARE_TYPE"
else
echo " Hardware type (forced): $HARDWARE_TYPE"
fi
# Print detection summary
print_detection_summary
echo ""
# Determine target IP if not specified
if [ -z "$TARGET_IP" ]; then
TARGET_IP=$(get_desired_vlan_ip)
echo " Auto-assigned target IP: $TARGET_IP"
echo " (Based on hardware type and environment-constraints.md)"
else
echo " Target IP (user-specified): $TARGET_IP"
fi
echo ""
# Dry-run check
if [ "$DRY_RUN" == "true" ]; then
echo "[DRY-RUN MODE] Would perform the following actions:"
echo ""
echo " 1. Configure network: $TARGET_IP (gateway: $GATEWAY, DNS: $DNS_SERVER)"
echo " 2. Install Docker (with Debian Trixie workaround if needed)"
echo " 3. Install Ansible"
[ "$HARDWARE_TYPE" == "proxmox" ] && echo " 4. Apply Proxmox repository fixes"
[ "$HARDWARE_TYPE" == "proxmox" ] && echo " 5. Install proxmoxer Python library"
echo " 6. Generate/verify SSH keys (ED25519)"
echo " 7. Run validation suite"
echo " 8. Generate hardware fingerprint"
echo ""
echo "No changes made. Re-run without --dry-run to execute."
exit 0
fi
# === PHASE 2: NETWORK CONFIGURATION ===
if [ "$SKIP_NETWORK" == "false" ]; then
echo "[PHASE 2] Network Configuration"
echo "---"
configure_network_safe "$TARGET_IP" "$GATEWAY" "$DNS_SERVER"
# Wait for network to stabilize
sleep 3
wait_for_network 15
echo ""
else
echo "[PHASE 2] Network Configuration (SKIPPED)"
echo ""
fi
# === PHASE 3: PACKAGE INSTALLATION ===
echo "[PHASE 3] Package Installation"
echo "---"
# Update package lists
echo " [⚙] Updating package lists..."
sudo apt-get update -qq
# Install prerequisites
echo " [⚙] Installing prerequisites (ca-certificates, curl, gnupg)..."
sudo apt-get install -y -qq ca-certificates curl gnupg lsb-release
# --- DOCKER INSTALLATION ---
if ! command -v docker &>/dev/null; then
echo " [⚙] Installing Docker..."
# Remove existing Docker repo configs
sudo rm -f /etc/apt/sources.list.d/docker.list
sudo rm -f /etc/apt/sources.list.d/docker*.list
# Add Docker GPG key
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/debian/gpg | \
sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg --yes
# Determine repository codename
local os_version=$(detect_os_version)
local repo_codename="$os_version"
# Debian Trixie workaround (use Bookworm repos)
if is_debian_trixie; then
echo " [!] Debian Trixie detected - using Bookworm repos for Docker"
repo_codename="bookworm"
fi
# Add Docker repository
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $repo_codename stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
# Install Docker
sudo apt-get update -qq
sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
# Add current user to docker group
sudo usermod -aG docker "$USER"
echo " [✓] Docker installed: $(docker --version)"
else
echo " [✓] Docker already installed: $(docker --version)"
fi
# --- ANSIBLE INSTALLATION ---
if ! command -v ansible &>/dev/null; then
echo " [⚙] Installing Ansible..."
sudo apt-get install -y ansible
echo " [✓] Ansible installed: $(ansible --version | head -n1)"
else
echo " [✓] Ansible already installed: $(ansible --version | head -n1)"
fi
# --- PROXMOX-SPECIFIC PACKAGES ---
if [ "$HARDWARE_TYPE" == "proxmox" ]; then
echo " [⚙] Installing Proxmox-specific packages..."
# Install Python pip if needed
if ! command -v pip3 &>/dev/null; then
sudo apt-get install -y python3-pip
fi
# Install proxmoxer
if ! python3 -c "import proxmoxer" 2>/dev/null; then
echo " [⚙] Installing proxmoxer Python library..."
pip3 install proxmoxer --break-system-packages 2>/dev/null || pip3 install proxmoxer
echo " [✓] proxmoxer installed"
else
echo " [✓] proxmoxer already installed"
fiInstall jq for Proxmox post-install tasks
if ! command -v jq &>/dev/null; then
sudo apt-get install -y jq
fi
echo ""
echo "=== PROXMOX POST-INSTALL CONFIGURATION ==="
# Run comprehensive Proxmox post-install routine
# This includes: repository fixes, subscription nag removal, HA management
run_proxmox_post_install "auto"
echo "=========================================="
echo "" echo "deb http://download.proxmox.com/debian/pve bookworm pve-no-subscription" | \
sudo tee /etc/apt/sources.list.d/pve-no-subscription.list > /dev/null
fi
fi
# --- UTILITY PACKAGES ---
echo " [⚙] Installing utility packages..."
sudo apt-get install -y -qq git vim htop curl wget nfs-common net-tools dnsutils
echo " [✓] Package installation complete"
echo ""
# === PHASE 4: SSH KEY MANAGEMENT ===
echo "[PHASE 4] SSH Key Management"
echo "---"
local ssh_key_path=""
# Check for existing keys (prefer ED25519)
if [ -f "$HOME/.ssh/id_ed25519" ]; then
ssh_key_path="$HOME/.ssh/id_ed25519"
echo " [✓] Found existing ED25519 key: $ssh_key_path"
elif [ -f "$HOME/.ssh/id_rsa" ]; then
ssh_key_path="$HOME/.ssh/id_rsa"
echo " [✓] Found existing RSA key: $ssh_key_path"
else
# Generate new ED25519 key
ssh_key_path="$HOME/.ssh/id_ed25519"
echo " [⚙] Generating new ED25519 key pair..."
ssh-keygen -t ed25519 -f "$ssh_key_path" -N "" -C "$(whoami)@$(hostname)-$(date +%Y%m%d)"
echo " [✓] SSH key generated: $ssh_key_path"
fi
# Display public key
echo ""
echo " Public key (add to authorized_keys on managed nodes):"
echo " ---"
cat "${ssh_key_path}.pub"
echo " ---"
echo ""
# === PHASE 5: VALIDATION ===
if [ "$SKIP_VALIDATION" == "false" ]; then
echo "[PHASE 5] System Validation"
echo "---"
# Run comprehensive validation
if run_validation_suite; then
echo ""
echo " [✓] All validation checks passed"
else
echo ""
echo " [!] Validation completed with errors - review above"
echo " [!] You may proceed, but manual intervention may be required"
fi
echo ""
# Save validation log
local log_dir="${SCRIPT_DIR}/../ansible/archive/outputs"
mkdir -p "$log_dir"
local log_file="${log_dir}/bootstrap-validation-$(hostname)-${TIMESTAMP}.log"
# Re-run validation to capture log
run_validation_suite 2>&1 | tee "$log_file" >/dev/null
echo " [✓] Validation log saved: $log_file"
echo ""
else
echo "[PHASE 5] System Validation (SKIPPED)"
echo ""
fi
# === PHASE 6: HARDWARE FINGERPRINTING ===
echo "[PHASE 6] Hardware Fingerprinting"
echo "---"
# Print summary
print_hardware_summary
echo ""
# Validate against standards
echo " Checking against environment-constraints.md standards..."
validate_against_standards || true
echo ""
# Save hardware facts
local facts_dir="${SCRIPT_DIR}/../ansible/archive/outputs"
local facts_file=$(save_hardware_facts "$facts_dir")
echo " [✓] Hardware facts saved: $facts_file"
# Save JSON if requested
if [ "$OUTPUT_JSON" == "true" ]; then
local json_file="${facts_dir}/hardware-facts-$(hostname)-${TIMESTAMP}.json"
generate_json_output > "$json_file"
echo " [✓] JSON output saved: $json_file"
fi
# Generate inventory snippet
local inventory_dir="${SCRIPT_DIR}/../ansible/archive/inventory"
mkdir -p "$inventory_dir"
local inventory_file=$(append_to_discovered_inventory "${inventory_dir}/discovered-hosts.yml")
echo " [✓] Inventory snippet appended: $inventory_file"
echo ""
# === COMPLETION ===
echo "======================================="
echo "BOOTSTRAP COMPLETE"
echo "======================================="
echo ""
echo "Summary:"
echo " Hostname: $(hostname)"
echo " IP Address: $(get_current_ip)"
echo " Hardware Type: $HARDWARE_TYPE"
echo " OS: $(detect_os_family) $(detect_os_version)"
echo ""
echo "Next Steps:"
echo " 1. Reconnect SSH if network was reconfigured: ssh $(whoami)@$(get_current_ip)"
echo " 2. Verify Docker: docker ps"
echo " 3. Verify Ansible: ansible --version"
echo " 4. Review hardware facts: cat $facts_file"
echo " 5. Run Ansible playbook: ansible-playbook playbooks/onboarding/generic_host.yml"
echo ""
echo "Files Generated:"
echo " - $facts_file"
[ "$OUTPUT_JSON" == "true" ] && echo " - ${facts_dir}/hardware-facts-$(hostname)-${TIMESTAMP}.json"
[ "$SKIP_VALIDATION" == "false" ] && echo " - ${log_dir}/bootstrap-validation-$(hostname)-${TIMESTAMP}.log"
echo " - $inventory_file"
echo ""
echo "Documentation:"
echo " - SOP: documentation/SOPs/SOP-002-Initial-Infrastructure-Deployment.md"
echo " - Technical Runbook: documentation/TECHNICAL_RUNBOOK.md"
echo ""
echo "Have a great day! 🚀"
echo ""
}
# --- ENTRY POINT ---
# Trap errors
trap 'echo "ERROR: Bootstrap failed at line $LINENO. Check logs for details." >&2' ERR
# Run main workflow
main "$@"