112 lines
4.7 KiB
YAML
112 lines
4.7 KiB
YAML
---
|
|
# playbooks/proxmox/pve_update.yml
|
|
# Rolling Proxmox cluster package update with conditional kernel reboot.
|
|
#
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# ⚠️ HUMAN-TRIGGERED ONLY — do not automate or schedule.
|
|
# serial: 1 ensures one node is updated at a time to protect cluster quorum.
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
#
|
|
# What this does:
|
|
# 1. Pre-checks cluster quorum — fails fast if quorum is degraded
|
|
# 2. Runs apt dist-upgrade on the target node
|
|
# 3. Reboots if a kernel update was applied (tags: reboot)
|
|
# 4. Waits for the node to return online (tags: reboot)
|
|
# 5. Re-verifies cluster quorum before proceeding to the next node
|
|
#
|
|
# Usage:
|
|
# # All nodes (rolling — pve01 → pve02 → pve03):
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml
|
|
#
|
|
# # Single node:
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml --limit pve01
|
|
#
|
|
# # Dry-run (confirms serial order and reboot conditions without modifying):
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml --check
|
|
#
|
|
# # Update packages but skip reboot even if kernel changed:
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/pve_update.yml --skip-tags reboot
|
|
|
|
- name: Rolling Proxmox cluster update
|
|
hosts: proxmox_cluster
|
|
become: true
|
|
serial: 1
|
|
|
|
tasks:
|
|
- name: "Pre-flight: verify cluster quorum before updating this node"
|
|
block:
|
|
- name: Check cluster quorum status
|
|
ansible.builtin.command: pvecm status
|
|
register: pvecm_pre
|
|
changed_when: false
|
|
check_mode: false
|
|
|
|
- name: Fail if cluster is not quorate before touching this node
|
|
ansible.builtin.assert:
|
|
that:
|
|
- "'Quorate:' in pvecm_pre.stdout"
|
|
- "'Quorate:' in pvecm_pre.stdout and 'Yes' in (pvecm_pre.stdout | regex_search('Quorate:.*') | default(''))"
|
|
fail_msg: |
|
|
⛔ Cluster quorum is NOT healthy before updating {{ inventory_hostname }}.
|
|
Fix quorum before proceeding.
|
|
pvecm status:
|
|
{{ pvecm_pre.stdout }}
|
|
success_msg: "✅ Cluster quorate — safe to update {{ inventory_hostname }}"
|
|
|
|
- name: "Update packages"
|
|
block:
|
|
- name: Update apt cache
|
|
ansible.builtin.apt:
|
|
update_cache: true
|
|
cache_valid_time: 0
|
|
|
|
- name: Run apt dist-upgrade
|
|
ansible.builtin.apt:
|
|
upgrade: dist
|
|
update_cache: false
|
|
register: dist_upgrade_result
|
|
tags: [update]
|
|
|
|
- name: Check if a newer kernel is installed but not yet booted
|
|
ansible.builtin.shell: |
|
|
LATEST=$(ls /boot/vmlinuz-* | sort -V | tail -1 | sed 's|/boot/vmlinuz-||')
|
|
RUNNING=$(uname -r)
|
|
if [ "$LATEST" != "$RUNNING" ]; then echo "reboot_needed"; fi
|
|
register: reboot_check
|
|
changed_when: false
|
|
check_mode: false
|
|
tags: [reboot]
|
|
|
|
- name: Reboot if a newer kernel is installed
|
|
ansible.builtin.reboot:
|
|
msg: "Rebooting into {{ reboot_check.stdout | trim }} — initiated by pve_update.yml"
|
|
reboot_timeout: 600
|
|
when: reboot_check.stdout | trim == 'reboot_needed'
|
|
tags: [reboot]
|
|
|
|
- name: Wait for node to return post-reboot
|
|
ansible.builtin.wait_for_connection:
|
|
delay: 10
|
|
timeout: 600
|
|
when: reboot_check.stdout | trim == 'reboot_needed'
|
|
tags: [reboot]
|
|
|
|
- name: "Post-flight: re-verify cluster quorum after node returns"
|
|
block:
|
|
- name: Check cluster quorum status post-update
|
|
ansible.builtin.command: pvecm status
|
|
register: pvecm_post
|
|
changed_when: false
|
|
check_mode: false
|
|
|
|
- name: Assert cluster is quorate after update
|
|
ansible.builtin.assert:
|
|
that:
|
|
- "'Quorate:' in pvecm_post.stdout and 'Yes' in (pvecm_post.stdout | regex_search('Quorate:.*') | default(''))"
|
|
fail_msg: |
|
|
⛔ Cluster quorum is degraded after updating {{ inventory_hostname }}.
|
|
Investigate before proceeding to the next node.
|
|
pvecm status:
|
|
{{ pvecm_post.stdout }}
|
|
success_msg: "✅ {{ inventory_hostname }} updated — cluster quorum verified. Proceeding."
|