--- # playbooks/proxmox/pve_update.yml # Rolling Proxmox cluster package update with conditional kernel reboot. # # ───────────────────────────────────────────────────────────────────────────── # ⚠️ HUMAN-TRIGGERED ONLY — do not automate or schedule. # serial: 1 ensures one node is updated at a time to protect cluster quorum. # ───────────────────────────────────────────────────────────────────────────── # # What this does: # 1. Pre-checks cluster quorum — fails fast if quorum is degraded # 2. Runs apt dist-upgrade on the target node # 3. Reboots if a kernel update was applied (tags: reboot) # 4. Waits for the node to return online (tags: reboot) # 5. Re-verifies cluster quorum before proceeding to the next node # # Usage: # # All nodes (rolling — pve01 → pve02 → pve03): # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml # # # Single node: # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml --limit pve01 # # # Dry-run (confirms serial order and reboot conditions without modifying): # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml --check # # # Update packages but skip reboot even if kernel changed: # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml --skip-tags reboot - name: Rolling Proxmox cluster update hosts: proxmox_cluster become: true serial: 1 tasks: - name: "Pre-flight: verify cluster quorum before updating this node" block: - name: Check cluster quorum status ansible.builtin.command: pvecm status register: pvecm_pre changed_when: false check_mode: false - name: Fail if cluster is not quorate before touching this node ansible.builtin.assert: that: - "'Quorate:' in pvecm_pre.stdout" - "'Quorate:' in pvecm_pre.stdout and 'Yes' in (pvecm_pre.stdout | regex_search('Quorate:.*') | default(''))" fail_msg: | ⛔ Cluster quorum is NOT healthy before updating {{ inventory_hostname }}. Fix quorum before proceeding. pvecm status: {{ pvecm_pre.stdout }} success_msg: "✅ Cluster quorate — safe to update {{ inventory_hostname }}" - name: "Update packages" block: - name: Update apt cache ansible.builtin.apt: update_cache: true cache_valid_time: 0 - name: Run apt dist-upgrade ansible.builtin.apt: upgrade: dist update_cache: false register: dist_upgrade_result tags: [update] - name: Check if a newer kernel is installed but not yet booted ansible.builtin.shell: | LATEST=$(ls /boot/vmlinuz-* | sort -V | tail -1 | sed 's|/boot/vmlinuz-||') RUNNING=$(uname -r) if [ "$LATEST" != "$RUNNING" ]; then echo "reboot_needed"; fi register: reboot_check changed_when: false check_mode: false tags: [reboot] - name: Reboot if a newer kernel is installed ansible.builtin.reboot: msg: "Rebooting into {{ reboot_check.stdout | trim }} — initiated by pve_update.yml" reboot_timeout: 600 when: reboot_check.stdout | trim == 'reboot_needed' tags: [reboot] - name: Wait for node to return post-reboot ansible.builtin.wait_for_connection: delay: 10 timeout: 600 when: reboot_check.stdout | trim == 'reboot_needed' tags: [reboot] - name: "Post-flight: re-verify cluster quorum after node returns" block: - name: Check cluster quorum status post-update ansible.builtin.command: pvecm status register: pvecm_post changed_when: false check_mode: false - name: Assert cluster is quorate after update ansible.builtin.assert: that: - "'Quorate:' in pvecm_post.stdout and 'Yes' in (pvecm_post.stdout | regex_search('Quorate:.*') | default(''))" fail_msg: | ⛔ Cluster quorum is degraded after updating {{ inventory_hostname }}. Investigate before proceeding to the next node. pvecm status: {{ pvecm_post.stdout }} success_msg: "✅ {{ inventory_hostname }} updated — cluster quorum verified. Proceeding."