feat(ansible): enhance inventory management and onboarding playbooks with detailed host variables and system facts collection

This commit is contained in:
Nathan 2026-04-13 20:01:48 -04:00
parent 7a3ba409e6
commit ef875a78cc
8 changed files with 599 additions and 8 deletions

View File

@ -0,0 +1,56 @@
---
# Host-specific variables for heimdall
# IP: 10.0.0.151
# Auto-generated: 2026-04-13T23:44:52Z
# Hardware Details
hardware:
platform: physical_server
cpu: Intel(R) N100 (4 cores)
memory_gb: 15
storage_gb: 0
architecture: x86_64
# Operating System
os:
distribution: Ubuntu
version: "24.04"
codename: "Noble"
kernel: 6.8.0-107-generic
# GPU Configuration
gpu:
enabled: true
device: /dev/dri
info: "/usr/bin/lspci
00:02.0 VGA compatible controller: Intel Corporation Alder Lake-N [UHD Graphics]"
# Docker Status
docker:
installed: True
version: "Docker version 29.4.0, build 9d7ad9f"
running_containers:
- trek
- vaultwarden
- komodo-core
- komodo-periphery-heimdall
- gitea-server
- gitea-db
- traefik
- docker-socket-proxy
- redis
- komodo-db
# NFS Configuration
nfs:
mounts_configured: True
mount_details: |
10.0.0.250:/Volume2/media on /mnt/media type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.151,local_lock=none,addr=10.0.0.250)
10.0.0.250:/Volume1/appdata on /mnt/appdata type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.151,local_lock=none,addr=10.0.0.250)
# Network Configuration
network:
primary_ip: 10.0.0.151
primary_interface: bond0
hostname: heimdall
fqdn: heimdall

View File

@ -0,0 +1,51 @@
---
# Host-specific variables for waldorf
# IP: 10.0.0.251
# Auto-generated: 2026-04-13T23:45:33Z
# Hardware Details
hardware:
platform: physical_server
cpu: Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz (8 cores)
memory_gb: 16
storage_gb: 0
architecture: x86_64
# Operating System
os:
distribution: Ubuntu
version: "24.04"
codename: "Noble"
kernel: 6.8.0-107-generic
# GPU Configuration
gpu:
enabled: true
device: /dev/dri
info: "/usr/bin/lspci
01:00.0 VGA compatible controller: NVIDIA Corporation GP106M [GeForce GTX 1060 Mobile Rev. 2] (rev a1)"
# Docker Status
docker:
installed: True
version: "Docker version 29.4.0, build 9d7ad9f"
running_containers:
- komodo-periphery-waldorf
- docker-socket-proxy
- buildx_buildkit_default
- tunarr
- plex
# NFS Configuration
nfs:
mounts_configured: True
mount_details: |
10.0.0.250:/Volume1/appdata on /mnt/appdata type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.251,local_lock=none,addr=10.0.0.250)
10.0.0.250:/Volume2/media on /mnt/media type nfs4 (rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.0.0.251,local_lock=none,addr=10.0.0.250)
# Network Configuration
network:
primary_ip: 10.0.0.110
primary_interface: enp0s31f6
hostname: waldorf
fqdn: waldorf

View File

@ -0,0 +1,48 @@
---
# Host-specific variables for watchtower
# IP: 10.0.0.200
# Auto-generated: 2026-04-13T23:48:26Z
# Hardware Details
hardware:
platform: physical_server
cpu: 2 (4 cores)
memory_gb: 16
storage_gb: 0
architecture: aarch64
# Operating System
os:
distribution: Debian
version: "13.4"
codename: "Trixie"
kernel: 6.12.75+rpt-rpi-2712
# GPU Configuration
gpu:
enabled: true
device: /dev/dri
info: "/usr/bin/lspci"
# Docker Status
docker:
installed: True
version: "Docker version 29.4.0, build 9d7ad9f"
running_containers:
- vscode
- komodo-perihery-watchtower
- traefik-kop
- docker-socket-proxy
# NFS Configuration
nfs:
mounts_configured: True
mount_details: |
10.0.0.250:/Volume1/appdata on /mnt/appdata type nfs (rw,relatime,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,nolock,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=10.0.0.250,mountvers=3,mountport=39187,mountproto=udp,local_lock=all,addr=10.0.0.250,x-systemd.automount)
# Network Configuration
network:
primary_ip: 10.0.0.80
primary_interface: eth0
hostname: watchtower
fqdn: watchtower

View File

@ -8,18 +8,36 @@
watchtower ansible_host=10.0.0.200 ansible_user=chester
# =============================================================================
# Add your managed nodes below
# Docker Nodes
# =============================================================================
[docker_nodes]
heimdall ansible_host=10.0.0.151 ansible_user=chester
waldorf ansible_host=10.0.0.251 ansible_user=chester
# Example structure:
# [docker_swarm_managers]
# heimdall ansible_host=10.0.0.X ansible_user=chester
# Core infrastructure services (Komodo, Gitea, Traefik)
[core_services]
heimdall ansible_host=10.0.0.151 ansible_user=chester
# [docker_swarm_workers]
# waldorf ansible_host=10.0.0.X ansible_user=chester
# Media services (Plex, Tunarr)
[media_services]
waldorf ansible_host=10.0.0.251 ansible_user=chester
# [proxmox_cluster]
# pve-node1 ansible_host=10.0.0.X ansible_user=root
# =============================================================================
# Platform Groups
# =============================================================================
[physical_servers]
heimdall ansible_host=10.0.0.151 ansible_user=chester
waldorf ansible_host=10.0.0.251 ansible_user=chester
[raspberry_pi]
watchtower ansible_host=10.0.0.200 ansible_user=chester
# =============================================================================
# NFS Clients (nodes with /mnt/appdata)
# =============================================================================
[nfs_clients]
heimdall ansible_host=10.0.0.151 ansible_user=chester
waldorf ansible_host=10.0.0.251 ansible_user=chester
# =============================================================================
# Group Variables
@ -27,3 +45,6 @@ watchtower ansible_host=10.0.0.200 ansible_user=chester
[all:vars]
ansible_python_interpreter=/usr/bin/python3
ansible_ssh_private_key_file=~/.ssh/id_ed25519
[nfs_clients:vars]
nfs_mount_point=/mnt/appdata

View File

@ -0,0 +1,130 @@
# Node Onboarding Guide
This guide covers onboarding new nodes into Ansible management from the watchtower control node.
---
## Prerequisites
**On Control Node (watchtower):**
- ✅ Ansible installed
- ✅ SSH key generated (`~/.ssh/id_ed25519`)
- ✅ Inventory configured
**On Target Nodes:**
- SSH access with password authentication enabled
- User account with sudo privileges
- Python 3 installed
---
## Quick Onboarding
### Step 1: Update Inventory
Edit [inventory/hosts.ini](../inventory/hosts.ini) to add the new node:
```ini
[docker_nodes]
newnode ansible_host=10.0.0.X ansible_user=chester
```
### Step 2: Run Onboarding Playbook
```bash
cd /home/chester/homelab/ansible
# Onboard specific nodes (will prompt for passwords)
ansible-playbook playbooks/onboard-nodes.yml -k -K --limit newnode
# Onboard all unonboarded nodes
ansible-playbook playbooks/onboard-nodes.yml -k -K --limit heimdall,waldorf
```
**Flags:**
- `-k` = Prompt for SSH password (initial connection)
- `-K` = Prompt for sudo password (if passwordless sudo not configured)
### Step 3: Test Connectivity
```bash
# Test basic connectivity
ansible newnode -m ping
# Test with privilege escalation
ansible newnode -b -m command -a 'whoami'
# Gather facts about the node
ansible newnode -m setup
```
---
## Current Node Status
| Node | IP | Platform | Services | Status |
|------|-------|----------|----------|--------|
| **watchtower** | 10.0.0.200 | Raspberry Pi 5 | Control Plane, Komodo Periphery | ✅ Control Node |
| **heimdall** | 10.0.0.151 | Proxmox VM | Komodo Core, Gitea, Traefik | ⏳ Pending onboarding |
| **waldorf** | 10.0.0.251 | Physical Server | Plex, Tunarr | ⏳ Pending onboarding |
---
## What the Onboarding Playbook Does
1. ✅ Deploys watchtower's SSH public key to target node
2. ✅ Verifies passwordless sudo configuration
3. ✅ Checks Python 3 availability
4. ✅ Validates Docker installation
5. ✅ Verifies NFS mount points
---
## Post-Onboarding
After successful onboarding, you can:
- Use all Ansible modules without password prompts
- Run playbooks against the node
- Automate deployments and configuration management
---
## Troubleshooting
### SSH Connection Fails
```bash
# Test manual SSH connection first
ssh chester@10.0.0.151
# If that works but Ansible fails, check inventory syntax
ansible-inventory --list
```
### Passwordless Sudo Required
Edit `/etc/sudoers.d/90-cloud-init-users` on target node:
```bash
# Allow user to run sudo without password
chester ALL=(ALL) NOPASSWD:ALL
```
### Python Not Found
```bash
# Install Python 3 on target node
sudo apt update && sudo apt install -y python3
```
---
## Next Steps
After onboarding, consider:
1. Configure automated deployments for Docker stacks
2. Set up monitoring and health checks
3. Implement backup automation
4. Create maintenance playbooks (updates, reboots, etc.)

View File

@ -0,0 +1,140 @@
---
# Gather Node Facts Playbook
# Purpose: Collect accurate system information from nodes for inventory
# Usage: ansible-playbook playbooks/gather-node-facts.yml
# Add --limit <hostname> to target specific nodes
# Use -k flag only if nodes aren't onboarded yet
- name: Gather facts from managed nodes
hosts: all
gather_facts: true
become: false
vars:
output_dir: "{{ playbook_dir }}/../inventory/host_vars"
tasks:
- name: Display discovered facts summary
ansible.builtin.debug:
msg:
- "======================================"
- "Host: {{ inventory_hostname }}"
- "======================================"
- "FQDN: {{ ansible_fqdn }}"
- "Distribution: {{ ansible_distribution }} {{ ansible_distribution_version }}"
- "Kernel: {{ ansible_kernel }}"
- "Architecture: {{ ansible_architecture }}"
- "CPU Model: {{ ansible_processor[2] | default('N/A') }}"
- "CPU Cores: {{ ansible_processor_vcpus }}"
- "Memory: {{ (ansible_memtotal_mb / 1024) | round(0) }} GB"
- "Primary IP: {{ ansible_default_ipv4.address }}"
- "Hostname: {{ ansible_hostname }}"
- name: Check for GPU devices
ansible.builtin.stat:
path: /dev/dri
register: gpu_check
- name: Detect GPU information (if available)
ansible.builtin.shell: |
if command -v lspci &> /dev/null; then
lspci | grep -i vga | head -1
else
echo "lspci not available"
fi
register: gpu_info
changed_when: false
failed_when: false
when: gpu_check.stat.exists
- name: Check Docker installation
ansible.builtin.command: docker --version
register: docker_version
changed_when: false
failed_when: false
- name: Check NFS mounts
ansible.builtin.shell: mount | grep nfs || echo "No NFS mounts"
register: nfs_mounts
changed_when: false
failed_when: false
- name: Detect running Docker containers
ansible.builtin.command: docker ps --format "{{ '{{' }}.Names{{ '}}' }}"
register: docker_containers
changed_when: false
failed_when: false
when: docker_version.rc == 0
- name: Generate host_vars content
ansible.builtin.set_fact:
host_vars_content: |
---
# Host-specific variables for {{ inventory_hostname }}
# IP: {{ ansible_host }}
# Auto-generated: {{ ansible_date_time.iso8601 }}
# Hardware Details
hardware:
platform: {{ 'proxmox_vm' if 'pve' in ansible_system_vendor | lower else 'physical_server' }}
cpu: {{ ansible_processor[2] if ansible_processor | length > 2 else ansible_processor[0] }} ({{ ansible_processor_vcpus }} cores)
memory_gb: {{ (ansible_memtotal_mb / 1024) | round(0) | int }}
storage_gb: {{ (ansible_devices[ansible_devices.keys() | list | first].size | replace('GB', '') | float) | round(0) | int if ansible_devices else 'unknown' }}
architecture: {{ ansible_architecture }}
# Operating System
os:
distribution: {{ ansible_distribution }}
version: "{{ ansible_distribution_version }}"
codename: "{{ ansible_distribution_release | title }}"
kernel: {{ ansible_kernel }}
{% if gpu_check.stat.exists and gpu_info.stdout != "lspci not available" %}
# GPU Configuration
gpu:
enabled: true
device: /dev/dri
info: "{{ gpu_info.stdout }}"
{% endif %}
# Docker Status
docker:
installed: {{ docker_version.rc == 0 }}
{% if docker_version.rc == 0 %}
version: "{{ docker_version.stdout }}"
{% endif %}
{% if docker_containers.stdout_lines | default([]) | length > 0 %}
running_containers:
{% for container in docker_containers.stdout_lines %}
- {{ container }}
{% endfor %}
{% endif %}
# NFS Configuration
nfs:
mounts_configured: {{ 'nfs' in nfs_mounts.stdout }}
{% if 'nfs' in nfs_mounts.stdout %}
mount_details: |
{{ nfs_mounts.stdout | indent(6) }}
{% endif %}
# Network Configuration
network:
primary_ip: {{ ansible_default_ipv4.address }}
primary_interface: {{ ansible_default_ipv4.interface }}
hostname: {{ ansible_hostname }}
fqdn: {{ ansible_fqdn }}
- name: Display generated host_vars
ansible.builtin.debug:
msg: "{{ host_vars_content }}"
- name: Save host_vars to file (local action)
delegate_to: localhost
ansible.builtin.copy:
content: "{{ host_vars_content }}"
dest: "{{ output_dir }}/{{ inventory_hostname }}.yml"
mode: "0644"
become: false
- name: Summary
ansible.builtin.debug:
msg: "✅ Generated {{ output_dir }}/{{ inventory_hostname }}.yml"

View File

@ -0,0 +1,106 @@
---
# Node Onboarding Playbook
# Purpose: Bootstrap new nodes for Ansible management
# Usage: ansible-playbook playbooks/onboard-nodes.yml -k -K
# (-k prompts for SSH password, -K prompts for sudo password)
- name: Onboard new nodes to Ansible control
hosts: heimdall,waldorf
gather_facts: true
become: false
tasks:
- name: Gather OS facts
ansible.builtin.setup:
gather_subset:
- "!all"
- "!min"
- "network"
- "distribution"
- name: Display target host information
ansible.builtin.debug:
msg: |
Onboarding {{ inventory_hostname }}
IP: {{ ansible_host }}
Distribution: {{ ansible_distribution }} {{ ansible_distribution_version }}
Architecture: {{ ansible_architecture }}
- name: Ensure .ssh directory exists
ansible.builtin.file:
path: "{{ ansible_env.HOME }}/.ssh"
state: directory
mode: "0700"
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
- name: Deploy watchtower SSH public key
ansible.builtin.authorized_key:
user: "{{ ansible_user }}"
state: present
key: "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJ9ryXcRsMITcIW+Rc0t3Qou7XGfyIeihLR2PInySogp ansible@watchtower"
comment: "ansible@watchtower"
- name: Test passwordless sudo access
ansible.builtin.command: sudo -n true
register: sudo_check
changed_when: false
failed_when: false
- name: Display sudo access status
ansible.builtin.debug:
msg: >-
{% if sudo_check.rc == 0 %}
✅ Passwordless sudo is configured
{% else %}
⚠️ Passwordless sudo is NOT configured - some playbooks may require -K flag
{% endif %}
- name: Verify Python 3 is available
ansible.builtin.command: python3 --version
register: python_version
changed_when: false
- name: Display Python version
ansible.builtin.debug:
msg: "Python: {{ python_version.stdout }}"
- name: Check if Docker is installed
ansible.builtin.command: docker --version
register: docker_check
changed_when: false
failed_when: false
- name: Display Docker status
ansible.builtin.debug:
msg: >-
{% if docker_check.rc == 0 %}
✅ Docker installed: {{ docker_check.stdout }}
{% else %}
⚠️ Docker is NOT installed
{% endif %}
- name: Check NFS mount point
ansible.builtin.stat:
path: /mnt/appdata
register: nfs_mount
- name: Display NFS mount status
ansible.builtin.debug:
msg: >-
{% if nfs_mount.stat.exists %}
✅ /mnt/appdata exists ({{ 'mounted' if nfs_mount.stat.ismount else 'not mounted' }})
{% else %}
⚠️ /mnt/appdata does NOT exist
{% endif %}
- name: Create onboarding summary
ansible.builtin.debug:
msg:
- "=========================================="
- "Onboarding Complete for {{ inventory_hostname }}"
- "=========================================="
- "✅ SSH key deployed"
- "✅ Host is reachable"
- "Next steps:"
- " • Test connectivity: ansible {{ inventory_hostname }} -m ping"
- " • Verify sudo: ansible {{ inventory_hostname }} -b -m command -a 'whoami'"

View File

@ -0,0 +1,39 @@
---
# Quick Facts Display
# Purpose: Show key system information without saving
# Usage: ansible-playbook playbooks/quick-facts.yml -k --limit hostname
- name: Quick system facts check
hosts: all
gather_facts: true
tasks:
- name: Display system summary
ansible.builtin.debug:
msg:
- "=========================================="
- "{{ inventory_hostname | upper }}"
- "=========================================="
- "IP Address: {{ ansible_host }}"
- "OS: {{ ansible_distribution }} {{ ansible_distribution_version }}"
- "Kernel: {{ ansible_kernel }}"
- "Arch: {{ ansible_architecture }}"
- "CPU: {{ ansible_processor[2] | default(ansible_processor[0]) }}"
- "Cores: {{ ansible_processor_vcpus }}"
- "Memory: {{ (ansible_memtotal_mb / 1024) | round(1) }} GB"
- "Disk: {{ ansible_devices.keys() | list }}"
- "Hostname: {{ ansible_hostname }}"
- "FQDN: {{ ansible_fqdn }}"
- name: Check for key paths
ansible.builtin.stat:
path: "{{ item }}"
loop:
- /mnt/appdata
- /dev/dri
- /usr/bin/docker
register: path_checks
- name: Display path status
ansible.builtin.debug:
msg: "{{ item.stat.path }}: {{ '✅ exists' if item.stat.exists else '❌ missing' }}"
loop: "{{ path_checks.results }}"