218 lines
9.1 KiB
YAML
218 lines
9.1 KiB
YAML
---
|
|
# playbooks/proxmox/pve_audit.yml
|
|
# Read-only cross-node consistency audit for the Proxmox cluster.
|
|
# Safe to schedule. Makes no changes to any host.
|
|
#
|
|
# What this does:
|
|
# Play 1 — Gathers key state from all proxmox_cluster nodes (kernel, repos,
|
|
# swap, nag script, GRUB cmdline, HA services, cluster quorum)
|
|
# Play 2 — Asserts consistency across all 3 nodes and writes a markdown
|
|
# drift report to outputs/pve_audit_<timestamp>.md
|
|
#
|
|
# Usage:
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_audit.yml
|
|
#
|
|
# Output:
|
|
# outputs/pve_audit_<timestamp>.md (repo root)
|
|
|
|
- name: "Play 1: Gather Proxmox cluster node state"
|
|
hosts: proxmox_cluster
|
|
become: true
|
|
gather_facts: true
|
|
|
|
tasks:
|
|
- name: Check nag removal script presence
|
|
ansible.builtin.stat:
|
|
path: /usr/local/bin/pve-remove-nag.sh
|
|
register: nag_script_stat
|
|
|
|
- name: Read GRUB cmdline
|
|
ansible.builtin.command: grep '^GRUB_CMDLINE_LINUX_DEFAULT=' /etc/default/grub
|
|
register: grub_cmdline
|
|
changed_when: false
|
|
check_mode: false
|
|
|
|
- name: Check enterprise repo files absent
|
|
ansible.builtin.stat:
|
|
path: "{{ item }}"
|
|
loop:
|
|
- /etc/apt/sources.list.d/pve-enterprise.list
|
|
- /etc/apt/sources.list.d/pve-enterprise.sources
|
|
- /etc/apt/sources.list.d/ceph.list
|
|
- /etc/apt/sources.list.d/ceph.sources
|
|
register: enterprise_repo_stat
|
|
|
|
- name: Get cluster quorum status
|
|
ansible.builtin.command: pvecm status
|
|
register: pvecm_status
|
|
changed_when: false
|
|
check_mode: false
|
|
failed_when: false
|
|
|
|
- name: Check HA and cluster service states
|
|
ansible.builtin.command: "systemctl is-active {{ item }}"
|
|
register: service_active_check
|
|
changed_when: false
|
|
check_mode: false
|
|
failed_when: false
|
|
loop:
|
|
- corosync
|
|
- pve-ha-lrm
|
|
- pve-ha-crm
|
|
|
|
- name: Check PermitRootLogin effective setting
|
|
ansible.builtin.command: sshd -T
|
|
register: sshd_config_dump
|
|
changed_when: false
|
|
check_mode: false
|
|
failed_when: false
|
|
|
|
- name: Stash per-node audit facts for cross-node comparison
|
|
ansible.builtin.set_fact:
|
|
pve_audit:
|
|
kernel: "{{ ansible_kernel }}"
|
|
distro_version: "{{ ansible_distribution_version }}"
|
|
swap_mb: "{{ ansible_swaptotal_mb }}"
|
|
nag_script_present: "{{ nag_script_stat.stat.exists }}"
|
|
grub_cmdline: "{{ grub_cmdline.stdout }}"
|
|
enterprise_repos_absent: >-
|
|
{{ enterprise_repo_stat.results | selectattr('stat.exists', 'equalto', true) | list | length == 0 }}
|
|
pvecm_output: "{{ pvecm_status.stdout | default('(pvecm not available)') }}"
|
|
quorate: >-
|
|
{{ 'Quorate:' in (pvecm_status.stdout | default('')) and
|
|
'Yes' in ((pvecm_status.stdout | default('')) | regex_search('Quorate:.*') | default('')) }}
|
|
corosync_active: >-
|
|
{{ (service_active_check.results | selectattr('item', 'equalto', 'corosync') | first).stdout == 'active' }}
|
|
ha_lrm_active: >-
|
|
{{ (service_active_check.results | selectattr('item', 'equalto', 'pve-ha-lrm') | first).stdout == 'active' }}
|
|
ha_crm_active: >-
|
|
{{ (service_active_check.results | selectattr('item', 'equalto', 'pve-ha-crm') | first).stdout == 'active' }}
|
|
permit_root_login: >-
|
|
{{ 'permitrootlogin yes' in (sshd_config_dump.stdout | default('') | lower) }}
|
|
|
|
|
|
- name: "Play 2: Cross-node consistency assertions and drift report"
|
|
hosts: localhost
|
|
gather_facts: false
|
|
|
|
vars:
|
|
pve_nodes: "{{ groups['proxmox_cluster'] }}"
|
|
audit_timestamp: "{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}"
|
|
report_path: "{{ playbook_dir }}/../../../outputs/pve_audit_{{ audit_timestamp }}.md"
|
|
|
|
tasks:
|
|
- name: Ensure outputs directory exists
|
|
ansible.builtin.file:
|
|
path: "{{ playbook_dir }}/../../../outputs"
|
|
state: directory
|
|
mode: '0755'
|
|
|
|
- name: Write drift report
|
|
ansible.builtin.copy:
|
|
dest: "{{ report_path }}"
|
|
mode: '0644'
|
|
content: |
|
|
# Proxmox Cluster Audit Report
|
|
|
|
Generated: {{ audit_timestamp }}
|
|
Nodes audited: {{ pve_nodes | join(', ') }}
|
|
|
|
## Node Summary
|
|
|
|
| Node | Kernel | Distro | Swap | Nag Script | Enterprise Repos | Quorate | Corosync | HA-LRM | HA-CRM |
|
|
|------|--------|--------|------|------------|------------------|---------|----------|--------|--------|
|
|
{% for node in pve_nodes %}
|
|
| {{ node }} | `{{ hostvars[node]['pve_audit']['kernel'] }}` | {{ hostvars[node]['pve_audit']['distro_version'] }} | {{ hostvars[node]['pve_audit']['swap_mb'] }}MB | {{ '✅' if hostvars[node]['pve_audit']['nag_script_present'] | bool else '❌' }} | {{ '✅ absent' if hostvars[node]['pve_audit']['enterprise_repos_absent'] | bool else '❌ present' }} | {{ '✅' if hostvars[node]['pve_audit']['quorate'] | bool else '❌' }} | {{ '✅' if hostvars[node]['pve_audit']['corosync_active'] | bool else '❌' }} | {{ '✅' if hostvars[node]['pve_audit']['ha_lrm_active'] | bool else '❌' }} | {{ '✅' if hostvars[node]['pve_audit']['ha_crm_active'] | bool else '❌' }} |
|
|
{% endfor %}
|
|
|
|
## GRUB Cmdline
|
|
|
|
{% for node in pve_nodes %}
|
|
- **{{ node }}**: `{{ hostvars[node]['pve_audit']['grub_cmdline'] }}`
|
|
{% endfor %}
|
|
|
|
## Cluster Quorum Status
|
|
|
|
{% for node in pve_nodes %}
|
|
### {{ node }}
|
|
|
|
```
|
|
{{ hostvars[node]['pve_audit']['pvecm_output'] }}
|
|
```
|
|
|
|
{% endfor %}
|
|
|
|
- name: Assert kernel consistency across all nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['kernel'] == hostvars[pve_nodes[0]]['pve_audit']['kernel']
|
|
fail_msg: >-
|
|
❌ Kernel drift: {{ item }} has {{ hostvars[item]['pve_audit']['kernel'] }}
|
|
but {{ pve_nodes[0] }} has {{ hostvars[pve_nodes[0]]['pve_audit']['kernel'] }}
|
|
success_msg: "✅ {{ item }}: kernel {{ hostvars[item]['pve_audit']['kernel'] }}"
|
|
loop: "{{ pve_nodes }}"
|
|
|
|
- name: Assert distro version consistency across all nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['distro_version'] == hostvars[pve_nodes[0]]['pve_audit']['distro_version']
|
|
fail_msg: >-
|
|
❌ Distro version drift: {{ item }} has {{ hostvars[item]['pve_audit']['distro_version'] }}
|
|
but {{ pve_nodes[0] }} has {{ hostvars[pve_nodes[0]]['pve_audit']['distro_version'] }}
|
|
success_msg: "✅ {{ item }}: distro {{ hostvars[item]['pve_audit']['distro_version'] }}"
|
|
loop: "{{ pve_nodes }}"
|
|
|
|
- name: Assert swap is disabled on all nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['swap_mb'] | int == 0
|
|
fail_msg: "❌ Swap is enabled on {{ item }}: {{ hostvars[item]['pve_audit']['swap_mb'] }}MB — run pve_baseline.yml --tags storage"
|
|
success_msg: "✅ {{ item }}: swap disabled"
|
|
loop: "{{ pve_nodes }}"
|
|
|
|
- name: Assert nag removal script present on all nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['nag_script_present'] | bool
|
|
fail_msg: "❌ Nag removal script missing on {{ item }} — run pve_baseline.yml --tags nag"
|
|
success_msg: "✅ {{ item }}: nag script present"
|
|
loop: "{{ pve_nodes }}"
|
|
|
|
- name: Assert enterprise repos absent on all nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['enterprise_repos_absent'] | bool
|
|
fail_msg: "❌ Enterprise repo still present on {{ item }} — run pve_baseline.yml --tags repos"
|
|
success_msg: "✅ {{ item }}: enterprise repos absent"
|
|
loop: "{{ pve_nodes }}"
|
|
|
|
- name: Assert cluster is quorate
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['quorate'] | bool
|
|
fail_msg: "❌ {{ item }} reports cluster NOT quorate — investigate immediately"
|
|
success_msg: "✅ {{ item }}: cluster quorate"
|
|
loop: "{{ pve_nodes }}"
|
|
|
|
- name: Assert HA and Corosync services running on all nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['corosync_active'] | bool
|
|
- hostvars[item]['pve_audit']['ha_lrm_active'] | bool
|
|
- hostvars[item]['pve_audit']['ha_crm_active'] | bool
|
|
fail_msg: >-
|
|
❌ HA/Corosync degraded on {{ item }}:
|
|
corosync={{ hostvars[item]['pve_audit']['corosync_active'] }}
|
|
pve-ha-lrm={{ hostvars[item]['pve_audit']['ha_lrm_active'] }}
|
|
pve-ha-crm={{ hostvars[item]['pve_audit']['ha_crm_active'] }}
|
|
success_msg: "✅ {{ item }}: corosync + HA services active"
|
|
loop: "{{ pve_nodes }}"
|
|
|
|
- name: Assert PermitRootLogin is enabled on all nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- hostvars[item]['pve_audit']['permit_root_login'] | bool
|
|
fail_msg: "❌ PermitRootLogin is not 'yes' on {{ item }} — run pve_baseline.yml --tags ssh to fix"
|
|
success_msg: "✅ {{ item }}: PermitRootLogin yes"
|
|
loop: "{{ pve_nodes }}"
|