From ef875a78cc9889f64ad8ea1983a31d6906925792 Mon Sep 17 00:00:00 2001 From: Nathan Date: Mon, 13 Apr 2026 20:01:48 -0400 Subject: [PATCH] feat(ansible): enhance inventory management and onboarding playbooks with detailed host variables and system facts collection --- ansible/inventory/host_vars/heimdall.yml | 56 +++++++++ ansible/inventory/host_vars/waldorf.yml | 51 ++++++++ ansible/inventory/host_vars/watchtower.yml | 48 +++++++ ansible/inventory/hosts.ini | 37 ++++-- ansible/playbooks/ONBOARDING.md | 130 +++++++++++++++++++ ansible/playbooks/gather-node-facts.yml | 140 +++++++++++++++++++++ ansible/playbooks/onboard-nodes.yml | 106 ++++++++++++++++ ansible/playbooks/quick-facts.yml | 39 ++++++ 8 files changed, 599 insertions(+), 8 deletions(-) create mode 100644 ansible/inventory/host_vars/heimdall.yml create mode 100644 ansible/inventory/host_vars/waldorf.yml create mode 100644 ansible/inventory/host_vars/watchtower.yml create mode 100644 ansible/playbooks/ONBOARDING.md create mode 100644 ansible/playbooks/gather-node-facts.yml create mode 100644 ansible/playbooks/onboard-nodes.yml create mode 100644 ansible/playbooks/quick-facts.yml diff --git a/ansible/inventory/host_vars/heimdall.yml b/ansible/inventory/host_vars/heimdall.yml new file mode 100644 index 0000000..a498af1 --- /dev/null +++ b/ansible/inventory/host_vars/heimdall.yml @@ -0,0 +1,56 @@ +--- +# Host-specific variables for heimdall +# IP: 10.0.0.151 +# Auto-generated: 2026-04-13T23:44:52Z + +# Hardware Details +hardware: + platform: physical_server + cpu: Intel(R) N100 (4 cores) + memory_gb: 15 + storage_gb: 0 + architecture: x86_64 + +# Operating System +os: + distribution: Ubuntu + version: "24.04" + codename: "Noble" + kernel: 6.8.0-107-generic + +# GPU Configuration +gpu: + enabled: true + device: /dev/dri + info: "/usr/bin/lspci +00:02.0 VGA compatible controller: Intel Corporation Alder Lake-N [UHD Graphics]" + +# Docker Status +docker: + installed: True + version: "Docker version 29.4.0, build 9d7ad9f" + running_containers: + - trek + - vaultwarden + - komodo-core + - komodo-periphery-heimdall + - gitea-server + - gitea-db + - traefik + - docker-socket-proxy + - redis + - komodo-db + +# NFS Configuration +nfs: + mounts_configured: True + mount_details: | + 10.0.0.250:/Volume2/media on /mnt/media type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.151,local_lock=none,addr=10.0.0.250) + 10.0.0.250:/Volume1/appdata on /mnt/appdata type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.151,local_lock=none,addr=10.0.0.250) + +# Network Configuration +network: + primary_ip: 10.0.0.151 + primary_interface: bond0 + hostname: heimdall + fqdn: heimdall diff --git a/ansible/inventory/host_vars/waldorf.yml b/ansible/inventory/host_vars/waldorf.yml new file mode 100644 index 0000000..7bcf7c9 --- /dev/null +++ b/ansible/inventory/host_vars/waldorf.yml @@ -0,0 +1,51 @@ +--- +# Host-specific variables for waldorf +# IP: 10.0.0.251 +# Auto-generated: 2026-04-13T23:45:33Z + +# Hardware Details +hardware: + platform: physical_server + cpu: Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz (8 cores) + memory_gb: 16 + storage_gb: 0 + architecture: x86_64 + +# Operating System +os: + distribution: Ubuntu + version: "24.04" + codename: "Noble" + kernel: 6.8.0-107-generic + +# GPU Configuration +gpu: + enabled: true + device: /dev/dri + info: "/usr/bin/lspci +01:00.0 VGA compatible controller: NVIDIA Corporation GP106M [GeForce GTX 1060 Mobile Rev. 2] (rev a1)" + +# Docker Status +docker: + installed: True + version: "Docker version 29.4.0, build 9d7ad9f" + running_containers: + - komodo-periphery-waldorf + - docker-socket-proxy + - buildx_buildkit_default + - tunarr + - plex + +# NFS Configuration +nfs: + mounts_configured: True + mount_details: | + 10.0.0.250:/Volume1/appdata on /mnt/appdata type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.251,local_lock=none,addr=10.0.0.250) + 10.0.0.250:/Volume2/media on /mnt/media type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.251,local_lock=none,addr=10.0.0.250) + +# Network Configuration +network: + primary_ip: 10.0.0.110 + primary_interface: enp0s31f6 + hostname: waldorf + fqdn: waldorf diff --git a/ansible/inventory/host_vars/watchtower.yml b/ansible/inventory/host_vars/watchtower.yml new file mode 100644 index 0000000..a0715f2 --- /dev/null +++ b/ansible/inventory/host_vars/watchtower.yml @@ -0,0 +1,48 @@ +--- +# Host-specific variables for watchtower +# IP: 10.0.0.200 +# Auto-generated: 2026-04-13T23:48:26Z + +# Hardware Details +hardware: + platform: physical_server + cpu: 2 (4 cores) + memory_gb: 16 + storage_gb: 0 + architecture: aarch64 + +# Operating System +os: + distribution: Debian + version: "13.4" + codename: "Trixie" + kernel: 6.12.75+rpt-rpi-2712 + +# GPU Configuration +gpu: + enabled: true + device: /dev/dri + info: "/usr/bin/lspci" + +# Docker Status +docker: + installed: True + version: "Docker version 29.4.0, build 9d7ad9f" + running_containers: + - vscode + - komodo-perihery-watchtower + - traefik-kop + - docker-socket-proxy + +# NFS Configuration +nfs: + mounts_configured: True + mount_details: | + 10.0.0.250:/Volume1/appdata on /mnt/appdata type nfs (rw,relatime,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,nolock,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=10.0.0.250,mountvers=3,mountport=39187,mountproto=udp,local_lock=all,addr=10.0.0.250,x-systemd.automount) + +# Network Configuration +network: + primary_ip: 10.0.0.80 + primary_interface: eth0 + hostname: watchtower + fqdn: watchtower diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini index d42e373..e85a0b5 100644 --- a/ansible/inventory/hosts.ini +++ b/ansible/inventory/hosts.ini @@ -8,18 +8,36 @@ watchtower ansible_host=10.0.0.200 ansible_user=chester # ============================================================================= -# Add your managed nodes below +# Docker Nodes # ============================================================================= +[docker_nodes] +heimdall ansible_host=10.0.0.151 ansible_user=chester +waldorf ansible_host=10.0.0.251 ansible_user=chester -# Example structure: -# [docker_swarm_managers] -# heimdall ansible_host=10.0.0.X ansible_user=chester +# Core infrastructure services (Komodo, Gitea, Traefik) +[core_services] +heimdall ansible_host=10.0.0.151 ansible_user=chester -# [docker_swarm_workers] -# waldorf ansible_host=10.0.0.X ansible_user=chester +# Media services (Plex, Tunarr) +[media_services] +waldorf ansible_host=10.0.0.251 ansible_user=chester -# [proxmox_cluster] -# pve-node1 ansible_host=10.0.0.X ansible_user=root +# ============================================================================= +# Platform Groups +# ============================================================================= +[physical_servers] +heimdall ansible_host=10.0.0.151 ansible_user=chester +waldorf ansible_host=10.0.0.251 ansible_user=chester + +[raspberry_pi] +watchtower ansible_host=10.0.0.200 ansible_user=chester + +# ============================================================================= +# NFS Clients (nodes with /mnt/appdata) +# ============================================================================= +[nfs_clients] +heimdall ansible_host=10.0.0.151 ansible_user=chester +waldorf ansible_host=10.0.0.251 ansible_user=chester # ============================================================================= # Group Variables @@ -27,3 +45,6 @@ watchtower ansible_host=10.0.0.200 ansible_user=chester [all:vars] ansible_python_interpreter=/usr/bin/python3 ansible_ssh_private_key_file=~/.ssh/id_ed25519 + +[nfs_clients:vars] +nfs_mount_point=/mnt/appdata diff --git a/ansible/playbooks/ONBOARDING.md b/ansible/playbooks/ONBOARDING.md new file mode 100644 index 0000000..675fa27 --- /dev/null +++ b/ansible/playbooks/ONBOARDING.md @@ -0,0 +1,130 @@ +# Node Onboarding Guide + +This guide covers onboarding new nodes into Ansible management from the watchtower control node. + +--- + +## Prerequisites + +**On Control Node (watchtower):** +- ✅ Ansible installed +- ✅ SSH key generated (`~/.ssh/id_ed25519`) +- ✅ Inventory configured + +**On Target Nodes:** +- SSH access with password authentication enabled +- User account with sudo privileges +- Python 3 installed + +--- + +## Quick Onboarding + +### Step 1: Update Inventory + +Edit [inventory/hosts.ini](../inventory/hosts.ini) to add the new node: + +```ini +[docker_nodes] +newnode ansible_host=10.0.0.X ansible_user=chester +``` + +### Step 2: Run Onboarding Playbook + +```bash +cd /home/chester/homelab/ansible + +# Onboard specific nodes (will prompt for passwords) +ansible-playbook playbooks/onboard-nodes.yml -k -K --limit newnode + +# Onboard all unonboarded nodes +ansible-playbook playbooks/onboard-nodes.yml -k -K --limit heimdall,waldorf +``` + +**Flags:** +- `-k` = Prompt for SSH password (initial connection) +- `-K` = Prompt for sudo password (if passwordless sudo not configured) + +### Step 3: Test Connectivity + +```bash +# Test basic connectivity +ansible newnode -m ping + +# Test with privilege escalation +ansible newnode -b -m command -a 'whoami' + +# Gather facts about the node +ansible newnode -m setup +``` + +--- + +## Current Node Status + +| Node | IP | Platform | Services | Status | +|------|-------|----------|----------|--------| +| **watchtower** | 10.0.0.200 | Raspberry Pi 5 | Control Plane, Komodo Periphery | ✅ Control Node | +| **heimdall** | 10.0.0.151 | Proxmox VM | Komodo Core, Gitea, Traefik | ⏳ Pending onboarding | +| **waldorf** | 10.0.0.251 | Physical Server | Plex, Tunarr | ⏳ Pending onboarding | + +--- + +## What the Onboarding Playbook Does + +1. ✅ Deploys watchtower's SSH public key to target node +2. ✅ Verifies passwordless sudo configuration +3. ✅ Checks Python 3 availability +4. ✅ Validates Docker installation +5. ✅ Verifies NFS mount points + +--- + +## Post-Onboarding + +After successful onboarding, you can: + +- Use all Ansible modules without password prompts +- Run playbooks against the node +- Automate deployments and configuration management + +--- + +## Troubleshooting + +### SSH Connection Fails + +```bash +# Test manual SSH connection first +ssh chester@10.0.0.151 + +# If that works but Ansible fails, check inventory syntax +ansible-inventory --list +``` + +### Passwordless Sudo Required + +Edit `/etc/sudoers.d/90-cloud-init-users` on target node: + +```bash +# Allow user to run sudo without password +chester ALL=(ALL) NOPASSWD:ALL +``` + +### Python Not Found + +```bash +# Install Python 3 on target node +sudo apt update && sudo apt install -y python3 +``` + +--- + +## Next Steps + +After onboarding, consider: + +1. Configure automated deployments for Docker stacks +2. Set up monitoring and health checks +3. Implement backup automation +4. Create maintenance playbooks (updates, reboots, etc.) diff --git a/ansible/playbooks/gather-node-facts.yml b/ansible/playbooks/gather-node-facts.yml new file mode 100644 index 0000000..d4ab135 --- /dev/null +++ b/ansible/playbooks/gather-node-facts.yml @@ -0,0 +1,140 @@ +--- +# Gather Node Facts Playbook +# Purpose: Collect accurate system information from nodes for inventory +# Usage: ansible-playbook playbooks/gather-node-facts.yml +# Add --limit to target specific nodes +# Use -k flag only if nodes aren't onboarded yet + +- name: Gather facts from managed nodes + hosts: all + gather_facts: true + become: false + vars: + output_dir: "{{ playbook_dir }}/../inventory/host_vars" + tasks: + - name: Display discovered facts summary + ansible.builtin.debug: + msg: + - "======================================" + - "Host: {{ inventory_hostname }}" + - "======================================" + - "FQDN: {{ ansible_fqdn }}" + - "Distribution: {{ ansible_distribution }} {{ ansible_distribution_version }}" + - "Kernel: {{ ansible_kernel }}" + - "Architecture: {{ ansible_architecture }}" + - "CPU Model: {{ ansible_processor[2] | default('N/A') }}" + - "CPU Cores: {{ ansible_processor_vcpus }}" + - "Memory: {{ (ansible_memtotal_mb / 1024) | round(0) }} GB" + - "Primary IP: {{ ansible_default_ipv4.address }}" + - "Hostname: {{ ansible_hostname }}" + + - name: Check for GPU devices + ansible.builtin.stat: + path: /dev/dri + register: gpu_check + + - name: Detect GPU information (if available) + ansible.builtin.shell: | + if command -v lspci &> /dev/null; then + lspci | grep -i vga | head -1 + else + echo "lspci not available" + fi + register: gpu_info + changed_when: false + failed_when: false + when: gpu_check.stat.exists + + - name: Check Docker installation + ansible.builtin.command: docker --version + register: docker_version + changed_when: false + failed_when: false + + - name: Check NFS mounts + ansible.builtin.shell: mount | grep nfs || echo "No NFS mounts" + register: nfs_mounts + changed_when: false + failed_when: false + + - name: Detect running Docker containers + ansible.builtin.command: docker ps --format "{{ '{{' }}.Names{{ '}}' }}" + register: docker_containers + changed_when: false + failed_when: false + when: docker_version.rc == 0 + + - name: Generate host_vars content + ansible.builtin.set_fact: + host_vars_content: | + --- + # Host-specific variables for {{ inventory_hostname }} + # IP: {{ ansible_host }} + # Auto-generated: {{ ansible_date_time.iso8601 }} + + # Hardware Details + hardware: + platform: {{ 'proxmox_vm' if 'pve' in ansible_system_vendor | lower else 'physical_server' }} + cpu: {{ ansible_processor[2] if ansible_processor | length > 2 else ansible_processor[0] }} ({{ ansible_processor_vcpus }} cores) + memory_gb: {{ (ansible_memtotal_mb / 1024) | round(0) | int }} + storage_gb: {{ (ansible_devices[ansible_devices.keys() | list | first].size | replace('GB', '') | float) | round(0) | int if ansible_devices else 'unknown' }} + architecture: {{ ansible_architecture }} + + # Operating System + os: + distribution: {{ ansible_distribution }} + version: "{{ ansible_distribution_version }}" + codename: "{{ ansible_distribution_release | title }}" + kernel: {{ ansible_kernel }} + + {% if gpu_check.stat.exists and gpu_info.stdout != "lspci not available" %} + # GPU Configuration + gpu: + enabled: true + device: /dev/dri + info: "{{ gpu_info.stdout }}" + {% endif %} + + # Docker Status + docker: + installed: {{ docker_version.rc == 0 }} + {% if docker_version.rc == 0 %} + version: "{{ docker_version.stdout }}" + {% endif %} + {% if docker_containers.stdout_lines | default([]) | length > 0 %} + running_containers: + {% for container in docker_containers.stdout_lines %} + - {{ container }} + {% endfor %} + {% endif %} + + # NFS Configuration + nfs: + mounts_configured: {{ 'nfs' in nfs_mounts.stdout }} + {% if 'nfs' in nfs_mounts.stdout %} + mount_details: | + {{ nfs_mounts.stdout | indent(6) }} + {% endif %} + + # Network Configuration + network: + primary_ip: {{ ansible_default_ipv4.address }} + primary_interface: {{ ansible_default_ipv4.interface }} + hostname: {{ ansible_hostname }} + fqdn: {{ ansible_fqdn }} + + - name: Display generated host_vars + ansible.builtin.debug: + msg: "{{ host_vars_content }}" + + - name: Save host_vars to file (local action) + delegate_to: localhost + ansible.builtin.copy: + content: "{{ host_vars_content }}" + dest: "{{ output_dir }}/{{ inventory_hostname }}.yml" + mode: "0644" + become: false + + - name: Summary + ansible.builtin.debug: + msg: "✅ Generated {{ output_dir }}/{{ inventory_hostname }}.yml" diff --git a/ansible/playbooks/onboard-nodes.yml b/ansible/playbooks/onboard-nodes.yml new file mode 100644 index 0000000..6de1403 --- /dev/null +++ b/ansible/playbooks/onboard-nodes.yml @@ -0,0 +1,106 @@ +--- +# Node Onboarding Playbook +# Purpose: Bootstrap new nodes for Ansible management +# Usage: ansible-playbook playbooks/onboard-nodes.yml -k -K +# (-k prompts for SSH password, -K prompts for sudo password) + +- name: Onboard new nodes to Ansible control + hosts: heimdall,waldorf + gather_facts: true + become: false + tasks: + - name: Gather OS facts + ansible.builtin.setup: + gather_subset: + - "!all" + - "!min" + - "network" + - "distribution" + + - name: Display target host information + ansible.builtin.debug: + msg: | + Onboarding {{ inventory_hostname }} + IP: {{ ansible_host }} + Distribution: {{ ansible_distribution }} {{ ansible_distribution_version }} + Architecture: {{ ansible_architecture }} + + - name: Ensure .ssh directory exists + ansible.builtin.file: + path: "{{ ansible_env.HOME }}/.ssh" + state: directory + mode: "0700" + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + + - name: Deploy watchtower SSH public key + ansible.builtin.authorized_key: + user: "{{ ansible_user }}" + state: present + key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJ9ryXcRsMITcIW+Rc0t3Qou7XGfyIeihLR2PInySogp ansible@watchtower" + comment: "ansible@watchtower" + + - name: Test passwordless sudo access + ansible.builtin.command: sudo -n true + register: sudo_check + changed_when: false + failed_when: false + + - name: Display sudo access status + ansible.builtin.debug: + msg: >- + {% if sudo_check.rc == 0 %} + ✅ Passwordless sudo is configured + {% else %} + ⚠️ Passwordless sudo is NOT configured - some playbooks may require -K flag + {% endif %} + + - name: Verify Python 3 is available + ansible.builtin.command: python3 --version + register: python_version + changed_when: false + + - name: Display Python version + ansible.builtin.debug: + msg: "Python: {{ python_version.stdout }}" + + - name: Check if Docker is installed + ansible.builtin.command: docker --version + register: docker_check + changed_when: false + failed_when: false + + - name: Display Docker status + ansible.builtin.debug: + msg: >- + {% if docker_check.rc == 0 %} + ✅ Docker installed: {{ docker_check.stdout }} + {% else %} + ⚠️ Docker is NOT installed + {% endif %} + + - name: Check NFS mount point + ansible.builtin.stat: + path: /mnt/appdata + register: nfs_mount + + - name: Display NFS mount status + ansible.builtin.debug: + msg: >- + {% if nfs_mount.stat.exists %} + ✅ /mnt/appdata exists ({{ 'mounted' if nfs_mount.stat.ismount else 'not mounted' }}) + {% else %} + ⚠️ /mnt/appdata does NOT exist + {% endif %} + + - name: Create onboarding summary + ansible.builtin.debug: + msg: + - "==========================================" + - "Onboarding Complete for {{ inventory_hostname }}" + - "==========================================" + - "✅ SSH key deployed" + - "✅ Host is reachable" + - "Next steps:" + - " • Test connectivity: ansible {{ inventory_hostname }} -m ping" + - " • Verify sudo: ansible {{ inventory_hostname }} -b -m command -a 'whoami'" diff --git a/ansible/playbooks/quick-facts.yml b/ansible/playbooks/quick-facts.yml new file mode 100644 index 0000000..20f0cdb --- /dev/null +++ b/ansible/playbooks/quick-facts.yml @@ -0,0 +1,39 @@ +--- +# Quick Facts Display +# Purpose: Show key system information without saving +# Usage: ansible-playbook playbooks/quick-facts.yml -k --limit hostname + +- name: Quick system facts check + hosts: all + gather_facts: true + tasks: + - name: Display system summary + ansible.builtin.debug: + msg: + - "==========================================" + - "{{ inventory_hostname | upper }}" + - "==========================================" + - "IP Address: {{ ansible_host }}" + - "OS: {{ ansible_distribution }} {{ ansible_distribution_version }}" + - "Kernel: {{ ansible_kernel }}" + - "Arch: {{ ansible_architecture }}" + - "CPU: {{ ansible_processor[2] | default(ansible_processor[0]) }}" + - "Cores: {{ ansible_processor_vcpus }}" + - "Memory: {{ (ansible_memtotal_mb / 1024) | round(1) }} GB" + - "Disk: {{ ansible_devices.keys() | list }}" + - "Hostname: {{ ansible_hostname }}" + - "FQDN: {{ ansible_fqdn }}" + + - name: Check for key paths + ansible.builtin.stat: + path: "{{ item }}" + loop: + - /mnt/appdata + - /dev/dri + - /usr/bin/docker + register: path_checks + + - name: Display path status + ansible.builtin.debug: + msg: "{{ item.stat.path }}: {{ '✅ exists' if item.stat.exists else '❌ missing' }}" + loop: "{{ path_checks.results }}"