331 lines
13 KiB
YAML

---
# playbooks/proxmox/pve_baseline.yml
# Idempotent Proxmox cluster baseline enforcement for 12th Gen Intel laptops.
#
# ─────────────────────────────────────────────────────────────────────────────
# PURPOSE: Ongoing drift enforcement — safe to run any time, safe to schedule.
# Does NOT upgrade packages. Does NOT reboot.
# For day-0 first-time provisioning: use playbooks/onboarding/proxmox_host.yml
# For rolling package updates: use playbooks/proxmox/pve_update.yml
# For cross-node consistency audit: use playbooks/proxmox/pve_audit.yml
# ─────────────────────────────────────────────────────────────────────────────
#
# What this enforces (all idempotent):
# 0. Identity: Operational user, SSH key, passwordless sudo
# 1. Repos: Enterprise repos removed, no-subscription repos present
# 2. Kernel: GRUB cmdline (ASPM, intel_pstate)
# 3. Laptop: Lid-switch suppression, suspend targets masked
# 4. Storage: Swap disabled
# 5. HFI Check: Intel Thread Director detection (read-only)
# 6. Nag removal: Subscription nag script + dpkg hook deployed
# 7. HA gate: HA/Corosync left running (standalone_mode: false for cluster)
#
# Usage:
# # All cluster nodes:
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_baseline.yml
#
# # Single node:
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_baseline.yml --limit pve01
#
# # Dry-run:
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_baseline.yml --check --diff
#
# # Target a specific section only:
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_baseline.yml --tags repos
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_baseline.yml --tags identity
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_baseline.yml --tags nag
- name: Proxmox cluster baseline enforcement
hosts: proxmox_cluster
become: true
vars:
is_laptop: true
# standalone_mode: false = HA/Corosync left running (correct for 3-node cluster).
# Override with -e standalone_mode=true only for a truly isolated single-node install.
standalone_mode: false
lab_user: "{{ lab_ansible_user | default('chester') }}"
controller_ssh_pubkey_candidates:
- "{{ lookup('env', 'HOME') }}/.ssh/id_ed25519_homelab.pub"
- "{{ lookup('env', 'HOME') }}/.ssh/id_ed25519.pub"
tasks:
- name: "0. Identity Management: Ensure user '{{ lab_user }}' is present"
tags: [identity, baseline]
block:
- name: Install sudo package
ansible.builtin.apt:
name: sudo
state: present
update_cache: false
- name: "Ensure group '{{ lab_user }}' exists"
ansible.builtin.group:
name: "{{ lab_user }}"
state: present
- name: "Create user '{{ lab_user }}' with sudo access"
ansible.builtin.user:
name: "{{ lab_user }}"
group: "{{ lab_user }}"
groups: sudo
shell: /bin/bash
password: '!'
password_lock: true
- name: Locate SSH public key on control machine
ansible.builtin.set_fact:
controller_ssh_pubkey_path: >-
{{ lookup('ansible.builtin.first_found', {'files': controller_ssh_pubkey_candidates, 'skip': true}) }}
delegate_to: localhost
become: false
- name: Fail early if SSH public key is missing
ansible.builtin.fail:
msg: >-
SSH public key not found on the control machine.
Checked: {{ controller_ssh_pubkey_candidates | join(', ') }}
Generate one with: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519
when: controller_ssh_pubkey_path | default('') | length == 0
- name: "Deploy SSH key to {{ lab_user }} user"
ansible.posix.authorized_key:
user: "{{ lab_user }}"
state: present
key: "{{ lookup('file', controller_ssh_pubkey_path) }}"
- name: "Allow '{{ lab_user }}' to use sudo without password"
ansible.builtin.copy:
dest: "/etc/sudoers.d/{{ lab_user }}"
content: "{{ lab_user }} ALL=(ALL) NOPASSWD: ALL\n"
mode: '0440'
owner: root
group: root
validate: '/usr/sbin/visudo -cf %s'
- name: "1. Repository configuration"
tags: [repos, baseline]
block:
- name: Check if /etc/apt/sources.list exists
ansible.builtin.stat:
path: /etc/apt/sources.list
register: apt_sources_list_stat
- name: Remove Proxmox enterprise repo files (.list/.sources)
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- /etc/apt/sources.list.d/pve-enterprise.list
- /etc/apt/sources.list.d/pve-enterprise.sources
- /etc/apt/sources.list.d/ceph.list
- /etc/apt/sources.list.d/ceph.sources
- /etc/apt/sources.list.d/ceph-enterprise.list
- /etc/apt/sources.list.d/ceph-enterprise.sources
- name: Remove enterprise.proxmox.com entries from /etc/apt/sources.list
ansible.builtin.lineinfile:
path: /etc/apt/sources.list
regexp: '^.*enterprise\.proxmox\.com.*$'
state: absent
when: apt_sources_list_stat.stat.exists
- name: Add Proxmox no-subscription repository
ansible.builtin.apt_repository:
repo: "deb http://download.proxmox.com/debian/pve {{ ansible_distribution_release }} pve-no-subscription"
filename: pve-no-subscription
state: present
- name: Add Proxmox Ceph no-subscription repository
ansible.builtin.apt_repository:
repo: "deb http://download.proxmox.com/debian/ceph-squid {{ ansible_distribution_release }} no-subscription"
filename: ceph-no-subscription
state: present
- name: Ensure required packages present (intel-microcode, htop, nvme-cli, lm-sensors)
ansible.builtin.apt:
name: [intel-microcode, htop, nvme-cli, lm-sensors]
state: present
update_cache: true
- name: "2. Kernel tuning (12th Gen & power)"
tags: [kernel, baseline]
block:
- name: Configure GRUB for ASPM & power savings
ansible.builtin.lineinfile:
path: /etc/default/grub
regexp: '^GRUB_CMDLINE_LINUX_DEFAULT='
line: 'GRUB_CMDLINE_LINUX_DEFAULT="quiet pcie_aspm=force intel_pstate=passive"'
notify: Update Grub
- name: "2b. SSH hardening: enforce PermitRootLogin for Proxmox cluster management"
tags: [ssh, baseline]
block:
- name: Deploy drop-in sshd config to enforce PermitRootLogin yes
ansible.builtin.copy:
dest: /etc/ssh/sshd_config.d/90-pve-root.conf
owner: root
group: root
mode: '0600'
content: |
# Managed by Ansible — pve_baseline.yml
# Proxmox cluster management requires root SSH access.
# This drop-in overrides any openssh-server package default that sets PermitRootLogin no.
PermitRootLogin yes
notify: Restart SSHD
- name: "3. Laptop safety: disable lid-close suspend"
when: is_laptop | default(false)
tags: [laptop, baseline]
block:
- name: Configure logind.conf to ignore lid switch
ansible.builtin.lineinfile:
path: /etc/systemd/logind.conf
regexp: "^#?{{ item.key }}="
line: "{{ item.key }}={{ item.value }}"
loop:
- { key: "HandleLidSwitch", value: "ignore" }
- { key: "HandleLidSwitchExternalPower", value: "ignore" }
notify: Restart Logind
- name: Mask sleep/suspend targets (hardware lock)
ansible.builtin.systemd:
name: "{{ item }}"
masked: true
loop:
- sleep.target
- suspend.target
- hibernate.target
- hybrid-sleep.target
- name: "4. Storage & SSD health"
tags: [storage, baseline]
block:
- name: Disable swap (protect NVMe lifespan)
ansible.builtin.shell: |
swapoff -a
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
when: ansible_swaptotal_mb > 0
changed_when: ansible_swaptotal_mb > 0
- name: "5. Intel Thread Director support check"
tags: [baseline]
ansible.builtin.shell: "dmesg | grep -i 'Hardware Feedback Interface'"
register: hfi_check
failed_when: false
changed_when: false
- name: "6. Proxmox Web UI: subscription nag removal"
tags: [nag, baseline]
block:
- name: Deploy subscription nag removal script
ansible.builtin.copy:
dest: /usr/local/bin/pve-remove-nag.sh
owner: root
group: root
mode: '0755'
content: |
#!/bin/sh
WEB_JS=/usr/share/javascript/proxmox-widget-toolkit/proxmoxlib.js
if [ -s "$WEB_JS" ] && ! grep -q NoMoreNagging "$WEB_JS"; then
echo "Patching Web UI nag..."
sed -i -e "/data\.status/ s/!//" -e "/data\.status/ s/active/NoMoreNagging/" "$WEB_JS"
fi
MOBILE_TPL=/usr/share/pve-yew-mobile-gui/index.html.tpl
MARKER="<!-- MANAGED BLOCK FOR MOBILE NAG -->"
if [ -f "$MOBILE_TPL" ] && ! grep -q "$MARKER" "$MOBILE_TPL"; then
echo "Patching Mobile UI nag..."
printf "%s\n" \
"$MARKER" \
"<script>" \
" function removeSubscriptionElements() {" \
" const dialogs = document.querySelectorAll('dialog.pwt-outer-dialog');" \
" dialogs.forEach(dialog => {" \
" const text = (dialog.textContent || '').toLowerCase();" \
" if (text.includes('subscription')) {" \
" dialog.remove();" \
" console.log('Removed subscription dialog');" \
" }" \
" });" \
" const cards = document.querySelectorAll('.pwt-card.pwt-p-2.pwt-d-flex.pwt-interactive.pwt-justify-content-center');" \
" cards.forEach(card => {" \
" const text = (card.textContent || '').toLowerCase();" \
" const hasButton = card.querySelector('button');" \
" if (!hasButton && text.includes('subscription')) {" \
" card.remove();" \
" console.log('Removed subscription card');" \
" }" \
" });" \
" }" \
" const observer = new MutationObserver(removeSubscriptionElements);" \
" observer.observe(document.body, { childList: true, subtree: true });" \
" removeSubscriptionElements();" \
" setInterval(removeSubscriptionElements, 300);" \
" setTimeout(() => {observer.disconnect();}, 10000);" \
"</script>" \
"" >>"$MOBILE_TPL"
fi
- name: Configure dpkg hook to auto-run nag removal after upgrades
ansible.builtin.copy:
dest: /etc/apt/apt.conf.d/no-nag-script
owner: root
group: root
mode: '0644'
content: |
DPkg::Post-Invoke { "/usr/local/bin/pve-remove-nag.sh"; };
- name: Run nag removal script immediately
ansible.builtin.command: /usr/local/bin/pve-remove-nag.sh
register: nag_removal_output
changed_when: "'Patching' in nag_removal_output.stdout"
- name: Reinstall proxmox-widget-toolkit to ensure nag patches apply
ansible.builtin.apt:
name: proxmox-widget-toolkit
state: present
register: widget_reinstall
failed_when: false
- name: "7. Standalone optimization: HA/Corosync gate"
when: standalone_mode | bool
tags: [ha, baseline]
block:
- name: Stop and disable pve-ha-lrm service
ansible.builtin.systemd:
name: pve-ha-lrm
state: stopped
enabled: false
failed_when: false
- name: Stop and disable pve-ha-crm service
ansible.builtin.systemd:
name: pve-ha-crm
state: stopped
enabled: false
failed_when: false
- name: Stop and disable Corosync service
ansible.builtin.systemd:
name: corosync
state: stopped
enabled: false
failed_when: false
handlers:
- name: Update Grub
ansible.builtin.command: update-grub
register: grub_update_result
changed_when: grub_update_result.rc == 0
- name: Restart Logind
ansible.builtin.systemd:
name: systemd-logind
state: restarted
- name: Restart SSHD
ansible.builtin.systemd:
name: ssh
state: restarted