homelab/scripts/validate-node.sh
nathan e16f98a183 feat(bootstrap)!: introduce unified bootstrap system with modular libraries
BREAKING CHANGE: day0bootstrap.sh deprecated in favor of bootstrap.sh

- Add scripts/bootstrap.sh (488 lines): Unified entrypoint supporting multiple hardware types (Proxmox/Docker VMs/Pi)
- Create scripts/lib/ modular library system:
  - detection.sh: OS/hardware/container detection (362 lines)
  - fingerprint.sh: System fingerprinting and inventory (494 lines)
  - network.sh: IP configuration and VLAN placement (356 lines)
  - proxmox.sh: PVE post-install automation (453 lines)
  - validation.sh: Comprehensive pre-flight checks (510 lines)
- Add validation tools: validate-node.sh, onboarding.sh, pi_init.sh
- Deprecate scripts/day0bootstrap.sh with graceful redirect wrapper
- Document architecture in scripts/README.md (495 lines) and PROXMOX-COMPARISON.md
- Update SOP-002 with new bootstrap workflow
- Add nodes/watchtower/compose.yaml (Raspberry Pi 5 stack)

Migration: Existing day0bootstrap.sh users automatically redirected to new system after 5-second warning. No manual intervention required.

Ref: Infrastructure automation modernization per active-tasks.md
2026-04-12 22:48:19 -04:00

216 lines
5.7 KiB
Bash
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# ==============================================================================
# STANDALONE NODE VALIDATION TOOL
# ==============================================================================
# Comprehensive health check utility for homelab nodes
# Can be run independently on any managed host (post-bootstrap or ad-hoc)
#
# Usage:
# ./validate-node.sh [OPTIONS]
#
# Options:
# --help Show this help message
# --json Output results in JSON format
# --critical-only Show only critical errors (exit code 1 if any found)
# --verbose Show detailed output for each check
#
# Exit Codes:
# 0 - All checks passed or warnings only
# 1 - Critical errors found
# 2 - Invalid usage
#
# ==============================================================================
set -euo pipefail
# --- SCRIPT METADATA ---
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")"
VERSION="1.0.0"
# --- LOAD LIBRARIES ---
# shellcheck source=./lib/detection.sh
source "${SCRIPT_DIR}/lib/detection.sh"
# shellcheck source=./lib/validation.sh
source "${SCRIPT_DIR}/lib/validation.sh"
# shellcheck source=./lib/network.sh
source "${SCRIPT_DIR}/lib/network.sh"
# --- COMMAND LINE ARGUMENTS ---
SHOW_HELP=false
OUTPUT_JSON=false
CRITICAL_ONLY=false
VERBOSE=false
parse_arguments() {
while [[ $# -gt 0 ]]; do
case "$1" in
--help|-h)
SHOW_HELP=true
shift
;;
--json)
OUTPUT_JSON=true
shift
;;
--critical-only)
CRITICAL_ONLY=true
shift
;;
--verbose|-v)
VERBOSE=true
shift
;;
*)
echo "ERROR: Unknown option: $1" >&2
echo "Use --help for usage information" >&2
exit 2
;;
esac
done
}
show_help() {
cat <<'EOF'
=============================================================================
STANDALONE NODE VALIDATION TOOL v1.0.0
=============================================================================
Comprehensive health check for homelab infrastructure nodes.
Can be run on any managed host to verify operational readiness.
USAGE:
./validate-node.sh [OPTIONS]
OPTIONS:
--help Show this help message
--json Output results in JSON format (for monitoring integration)
--critical-only Show only critical errors (suppress warnings)
--verbose Show detailed output for each check
EXIT CODES:
0 - All checks passed (or warnings only)
1 - Critical errors found
2 - Invalid usage
CHECKS PERFORMED:
• Disk Space & Performance
• Memory & Swap Configuration
• Network Routes & Connectivity
• Hostname Resolution
• NFS Client Configuration
• Docker Daemon Health
• Proxmox API (if applicable)
• SSH Security Configuration
• Firewall Status
• Time Synchronization
EXAMPLES:
./validate-node.sh
Run all checks with standard output
./validate-node.sh --critical-only
Show only critical errors (useful in scripts)
./validate-node.sh --json
Output JSON for monitoring/alerting systems
./validate-node.sh --verbose
Show detailed information for each check
INTEGRATION:
JSON output can be consumed by monitoring systems (Prometheus, Grafana, etc.)
Example: ./validate-node.sh --json | jq '.errors'
=============================================================================
EOF
}
# --- JSON OUTPUT FUNCTIONS ---
generate_json_report() {
# Generate JSON output for monitoring integration
local hostname=$(hostname)
local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
cat <<EOF
{
"hostname": "$hostname",
"timestamp": "$timestamp",
"validation": {
"passed": $VALIDATION_PASSED,
"warnings": $VALIDATION_WARNINGS,
"errors": $VALIDATION_ERRORS
},
"status": "$([ $VALIDATION_ERRORS -eq 0 ] && echo "healthy" || echo "critical")",
"hardware_type": "$(detect_hardware_type)",
"os_family": "$(detect_os_family)"
}
EOF
}
# --- MAIN WORKFLOW ---
main() {
# Parse CLI arguments
parse_arguments "$@"
if [ "$SHOW_HELP" == "true" ]; then
show_help
exit 0
fi
# Standard output header (unless JSON mode)
if [ "$OUTPUT_JSON" == "false" ]; then
echo "======================================="
echo "NODE VALIDATION TOOL v${VERSION}"
echo "Host: $(hostname)"
echo "Time: $(date -u +"%Y-%m-%d %H:%M:%S UTC")"
echo "======================================="
echo ""
fi
# Run validation suite
if [ "$VERBOSE" == "true" ]; then
# Show detection summary in verbose mode
[ "$OUTPUT_JSON" == "false" ] && print_detection_summary
[ "$OUTPUT_JSON" == "false" ] && echo ""
fi
# Run comprehensive validation
run_validation_suite
# Generate output
if [ "$OUTPUT_JSON" == "true" ]; then
generate_json_report
else
echo ""
echo "Validation completed."
echo ""
# Show summary (already printed by run_validation_suite)
if [ $VALIDATION_ERRORS -gt 0 ]; then
echo "⚠️ Critical issues found - manual intervention required"
exit 1
elif [ $VALIDATION_WARNINGS -gt 0 ]; then
echo " Warnings present - review recommended"
exit 0
else
echo "✅ All checks passed - node is healthy"
exit 0
fi
fi
# Exit code based on errors
[ $VALIDATION_ERRORS -eq 0 ] && exit 0 || exit 1
}
# --- ENTRY POINT ---
main "$@"