--- # playbooks/proxmox/grow_vm_disks.yml # # Purpose: # Idempotently ensures all Swarm VM disks are sized to vm_disk_target on the # Proxmox layer (Play 1), reboots affected VMs so the guest kernel reads the # new block device geometry (Play 2), then grows the in-guest partition and # filesystem to match (Play 3). # # Architecture: # Play 1 — proxmox_cluster: checks the actual LVM volume size via `lvs` (NOT # `qm config`, which can be out of sync) and uses `lvextend` if below target. # WHY lvs not qm config: qm resize updates Proxmox metadata but can silently # fail to grow the LVM when the VM is running. lvs shows ground truth. # Play 2 — proxmox_cluster: reboots only the VMs whose LVs were just extended. # Play 3 — swarm_hosts: waits for SSH, then runs disk_grow role (growpart + # resize2fs). WHY reboot required: virtio-scsi guests on this kernel do not # honour /sys/class/block/sda/device/rescan or scsi_host scans while running. # Only a cold re-read of block device geometry at boot is reliable. # # VMID scheme: manager = (node_index * 100) + 1, worker = (node_index * 100) + 2 # pve01 → 101/102, pve02 → 201/202, pve03 → 301/302 # LV path: /dev/pve/vm-{vmid}-disk-0 (standard local-lvm layout) # # Pre-requisites: # - SSH access to proxmox_cluster and swarm_hosts # - LVM tools available on Proxmox nodes (standard PVE install) # - cloud-guest-utils will be installed by disk_grow role if absent # # Usage: # Fix all Swarm VMs across all PVE nodes: # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml # # Fix a single node end-to-end (all three plays, one guest): # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml \ # -e "target_vmids=101" --limit pve01 # Play 1+2 on pve01, Play 3 on swarm-manager-1 # # In-guest grow only (disk already extended, VM already rebooted): # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml \ # --limit swarm-manager-1 --tags in_guest # # Validate only (no changes): # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml \ # --check # # Verification after run: # ansible swarm_hosts -i inventory/hosts.ini -m shell -a "df -h /" --become # ============================================================ # PLAY 1: Proxmox layer — extend LVM volume for each Swarm VM # Source of truth: lvs (actual LVM size), NOT qm config (metadata only) # ============================================================ - name: Extend Swarm VM LVM volumes on Proxmox hosts hosts: proxmox_cluster become: false gather_facts: false tags: [proxmox_resize] vars: vm_disk_target: "32G" vm_disk_target_gb: "{{ vm_disk_target | regex_replace('[^0-9]', '') | int }}" vm_lv_vg: "pve" tasks: - name: Derive VM IDs and LV names for this PVE node ansible.builtin.set_fact: disk_grow_manager_vmid: "{{ (inventory_hostname | regex_replace('[^0-9]', '') | int) * 100 + 1 }}" disk_grow_worker_vmid: "{{ (inventory_hostname | regex_replace('[^0-9]', '') | int) * 100 + 2 }}" # Manager VM ------------------------------------------------------- # WHY lvs not qm config: qm resize updates Proxmox metadata but silently # fails to grow the LVM when the VM is already running. lvs is ground truth. - name: Get actual LVM size for manager VM {{ disk_grow_manager_vmid }} ansible.builtin.shell: | lvs --noheadings --units g -o lv_size \ /dev/{{ vm_lv_vg }}/vm-{{ disk_grow_manager_vmid }}-disk-0 2>/dev/null \ | tr -d ' ' | sed 's/g$//' | cut -d. -f1 \ || echo "absent" args: executable: /bin/bash register: disk_grow_manager_lv_size changed_when: false - name: Extend manager VM LV to {{ vm_disk_target }} if below target ansible.builtin.shell: | lvextend -L {{ vm_disk_target }} \ /dev/{{ vm_lv_vg }}/vm-{{ disk_grow_manager_vmid }}-disk-0 args: executable: /bin/bash when: - disk_grow_manager_lv_size.stdout | trim != 'absent' - (disk_grow_manager_lv_size.stdout | trim | int) < (vm_disk_target_gb | int) register: disk_grow_manager_extend_result changed_when: disk_grow_manager_extend_result.rc == 0 - name: Report manager VM LV state ansible.builtin.debug: msg: >- Manager VM {{ disk_grow_manager_vmid }} LV: {{ disk_grow_manager_lv_size.stdout | trim }}G → {{ vm_disk_target }} ({{ 'extended — reboot required' if (disk_grow_manager_extend_result is not skipped) else 'already at target or absent' }}) when: disk_grow_manager_lv_size.stdout | trim != 'absent' # Worker VM -------------------------------------------------------- - name: Get actual LVM size for worker VM {{ disk_grow_worker_vmid }} ansible.builtin.shell: | lvs --noheadings --units g -o lv_size \ /dev/{{ vm_lv_vg }}/vm-{{ disk_grow_worker_vmid }}-disk-0 2>/dev/null \ | tr -d ' ' | sed 's/g$//' | cut -d. -f1 \ || echo "absent" args: executable: /bin/bash register: disk_grow_worker_lv_size changed_when: false - name: Extend worker VM LV to {{ vm_disk_target }} if below target ansible.builtin.shell: | lvextend -L {{ vm_disk_target }} \ /dev/{{ vm_lv_vg }}/vm-{{ disk_grow_worker_vmid }}-disk-0 args: executable: /bin/bash when: - disk_grow_worker_lv_size.stdout | trim != 'absent' - (disk_grow_worker_lv_size.stdout | trim | int) < (vm_disk_target_gb | int) register: disk_grow_worker_extend_result changed_when: disk_grow_worker_extend_result.rc == 0 - name: Report worker VM LV state ansible.builtin.debug: msg: >- Worker VM {{ disk_grow_worker_vmid }} LV: {{ disk_grow_worker_lv_size.stdout | trim }}G → {{ vm_disk_target }} ({{ 'extended — reboot required' if (disk_grow_worker_extend_result is not skipped) else 'already at target or absent' }}) when: disk_grow_worker_lv_size.stdout | trim != 'absent' # Reboot any VMs whose LV was just extended --------------------------- # WHY here not in Play 2: qm reboot runs on the PVE host, not the guest. # We only reboot VMs that were actually extended this run. - name: Reboot manager VM {{ disk_grow_manager_vmid }} to expose new disk size to guest kernel ansible.builtin.shell: qm reboot {{ disk_grow_manager_vmid }} args: executable: /bin/bash when: - disk_grow_manager_extend_result is not skipped - disk_grow_manager_extend_result.changed changed_when: true - name: Reboot worker VM {{ disk_grow_worker_vmid }} to expose new disk size to guest kernel ansible.builtin.shell: qm reboot {{ disk_grow_worker_vmid }} args: executable: /bin/bash when: - disk_grow_worker_extend_result is not skipped - disk_grow_worker_extend_result.changed changed_when: true # ============================================================ # PLAY 2: Wait for rebooted Swarm nodes to come back # ============================================================ - name: Wait for Swarm nodes to return after reboot hosts: swarm_hosts become: false gather_facts: false tags: [proxmox_resize, in_guest] tasks: - name: Wait for SSH to become available (up to 2 minutes) ansible.builtin.wait_for_connection: delay: 10 timeout: 120 # ============================================================ # PLAY 3: In-guest layer — grow partition and filesystem # ============================================================ - name: Grow in-guest root partition and filesystem on all Swarm nodes hosts: swarm_hosts become: true gather_facts: true tags: [in_guest] roles: - disk_grow