189 lines
7.8 KiB
YAML
189 lines
7.8 KiB
YAML
---
|
|
# playbooks/proxmox/grow_vm_disks.yml
|
|
#
|
|
# Purpose:
|
|
# Idempotently ensures all Swarm VM disks are sized to vm_disk_target on the
|
|
# Proxmox layer (Play 1), reboots affected VMs so the guest kernel reads the
|
|
# new block device geometry (Play 2), then grows the in-guest partition and
|
|
# filesystem to match (Play 3).
|
|
#
|
|
# Architecture:
|
|
# Play 1 — proxmox_cluster: checks the actual LVM volume size via `lvs` (NOT
|
|
# `qm config`, which can be out of sync) and uses `lvextend` if below target.
|
|
# WHY lvs not qm config: qm resize updates Proxmox metadata but can silently
|
|
# fail to grow the LVM when the VM is running. lvs shows ground truth.
|
|
# Play 2 — proxmox_cluster: reboots only the VMs whose LVs were just extended.
|
|
# Play 3 — swarm_hosts: waits for SSH, then runs disk_grow role (growpart +
|
|
# resize2fs). WHY reboot required: virtio-scsi guests on this kernel do not
|
|
# honour /sys/class/block/sda/device/rescan or scsi_host scans while running.
|
|
# Only a cold re-read of block device geometry at boot is reliable.
|
|
#
|
|
# VMID scheme: manager = (node_index * 100) + 1, worker = (node_index * 100) + 2
|
|
# pve01 → 101/102, pve02 → 201/202, pve03 → 301/302
|
|
# LV path: /dev/pve/vm-{vmid}-disk-0 (standard local-lvm layout)
|
|
#
|
|
# Pre-requisites:
|
|
# - SSH access to proxmox_cluster and swarm_hosts
|
|
# - LVM tools available on Proxmox nodes (standard PVE install)
|
|
# - cloud-guest-utils will be installed by disk_grow role if absent
|
|
#
|
|
# Usage:
|
|
# Fix all Swarm VMs across all PVE nodes:
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml
|
|
#
|
|
# Fix a single node end-to-end (all three plays, one guest):
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml \
|
|
# -e "target_vmids=101" --limit pve01 # Play 1+2 on pve01, Play 3 on swarm-manager-1
|
|
#
|
|
# In-guest grow only (disk already extended, VM already rebooted):
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml \
|
|
# --limit swarm-manager-1 --tags in_guest
|
|
#
|
|
# Validate only (no changes):
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/grow_vm_disks.yml \
|
|
# --check
|
|
#
|
|
# Verification after run:
|
|
# ansible swarm_hosts -i inventory/hosts.ini -m shell -a "df -h /" --become
|
|
|
|
# ============================================================
|
|
# PLAY 1: Proxmox layer — extend LVM volume for each Swarm VM
|
|
# Source of truth: lvs (actual LVM size), NOT qm config (metadata only)
|
|
# ============================================================
|
|
- name: Extend Swarm VM LVM volumes on Proxmox hosts
|
|
hosts: proxmox_cluster
|
|
become: false
|
|
gather_facts: false
|
|
tags: [proxmox_resize]
|
|
|
|
vars:
|
|
vm_disk_target: "32G"
|
|
vm_disk_target_gb: "{{ vm_disk_target | regex_replace('[^0-9]', '') | int }}"
|
|
vm_lv_vg: "pve"
|
|
|
|
tasks:
|
|
- name: Derive VM IDs and LV names for this PVE node
|
|
ansible.builtin.set_fact:
|
|
disk_grow_manager_vmid: "{{ (inventory_hostname | regex_replace('[^0-9]', '') | int) * 100 + 1 }}"
|
|
disk_grow_worker_vmid: "{{ (inventory_hostname | regex_replace('[^0-9]', '') | int) * 100 + 2 }}"
|
|
|
|
# Manager VM -------------------------------------------------------
|
|
# WHY lvs not qm config: qm resize updates Proxmox metadata but silently
|
|
# fails to grow the LVM when the VM is already running. lvs is ground truth.
|
|
|
|
- name: Get actual LVM size for manager VM {{ disk_grow_manager_vmid }}
|
|
ansible.builtin.shell: |
|
|
lvs --noheadings --units g -o lv_size \
|
|
/dev/{{ vm_lv_vg }}/vm-{{ disk_grow_manager_vmid }}-disk-0 2>/dev/null \
|
|
| tr -d ' ' | sed 's/g$//' | cut -d. -f1 \
|
|
|| echo "absent"
|
|
args:
|
|
executable: /bin/bash
|
|
register: disk_grow_manager_lv_size
|
|
changed_when: false
|
|
|
|
- name: Extend manager VM LV to {{ vm_disk_target }} if below target
|
|
ansible.builtin.shell: |
|
|
lvextend -L {{ vm_disk_target }} \
|
|
/dev/{{ vm_lv_vg }}/vm-{{ disk_grow_manager_vmid }}-disk-0
|
|
args:
|
|
executable: /bin/bash
|
|
when:
|
|
- disk_grow_manager_lv_size.stdout | trim != 'absent'
|
|
- (disk_grow_manager_lv_size.stdout | trim | int) < (vm_disk_target_gb | int)
|
|
register: disk_grow_manager_extend_result
|
|
changed_when: disk_grow_manager_extend_result.rc == 0
|
|
|
|
- name: Report manager VM LV state
|
|
ansible.builtin.debug:
|
|
msg: >-
|
|
Manager VM {{ disk_grow_manager_vmid }} LV:
|
|
{{ disk_grow_manager_lv_size.stdout | trim }}G
|
|
→ {{ vm_disk_target }}
|
|
({{ 'extended — reboot required' if (disk_grow_manager_extend_result is not skipped)
|
|
else 'already at target or absent' }})
|
|
when: disk_grow_manager_lv_size.stdout | trim != 'absent'
|
|
|
|
# Worker VM --------------------------------------------------------
|
|
|
|
- name: Get actual LVM size for worker VM {{ disk_grow_worker_vmid }}
|
|
ansible.builtin.shell: |
|
|
lvs --noheadings --units g -o lv_size \
|
|
/dev/{{ vm_lv_vg }}/vm-{{ disk_grow_worker_vmid }}-disk-0 2>/dev/null \
|
|
| tr -d ' ' | sed 's/g$//' | cut -d. -f1 \
|
|
|| echo "absent"
|
|
args:
|
|
executable: /bin/bash
|
|
register: disk_grow_worker_lv_size
|
|
changed_when: false
|
|
|
|
- name: Extend worker VM LV to {{ vm_disk_target }} if below target
|
|
ansible.builtin.shell: |
|
|
lvextend -L {{ vm_disk_target }} \
|
|
/dev/{{ vm_lv_vg }}/vm-{{ disk_grow_worker_vmid }}-disk-0
|
|
args:
|
|
executable: /bin/bash
|
|
when:
|
|
- disk_grow_worker_lv_size.stdout | trim != 'absent'
|
|
- (disk_grow_worker_lv_size.stdout | trim | int) < (vm_disk_target_gb | int)
|
|
register: disk_grow_worker_extend_result
|
|
changed_when: disk_grow_worker_extend_result.rc == 0
|
|
|
|
- name: Report worker VM LV state
|
|
ansible.builtin.debug:
|
|
msg: >-
|
|
Worker VM {{ disk_grow_worker_vmid }} LV:
|
|
{{ disk_grow_worker_lv_size.stdout | trim }}G
|
|
→ {{ vm_disk_target }}
|
|
({{ 'extended — reboot required' if (disk_grow_worker_extend_result is not skipped)
|
|
else 'already at target or absent' }})
|
|
when: disk_grow_worker_lv_size.stdout | trim != 'absent'
|
|
|
|
# Reboot any VMs whose LV was just extended ---------------------------
|
|
# WHY here not in Play 2: qm reboot runs on the PVE host, not the guest.
|
|
# We only reboot VMs that were actually extended this run.
|
|
|
|
- name: Reboot manager VM {{ disk_grow_manager_vmid }} to expose new disk size to guest kernel
|
|
ansible.builtin.shell: qm reboot {{ disk_grow_manager_vmid }}
|
|
args:
|
|
executable: /bin/bash
|
|
when:
|
|
- disk_grow_manager_extend_result is not skipped
|
|
- disk_grow_manager_extend_result.changed
|
|
changed_when: true
|
|
|
|
- name: Reboot worker VM {{ disk_grow_worker_vmid }} to expose new disk size to guest kernel
|
|
ansible.builtin.shell: qm reboot {{ disk_grow_worker_vmid }}
|
|
args:
|
|
executable: /bin/bash
|
|
when:
|
|
- disk_grow_worker_extend_result is not skipped
|
|
- disk_grow_worker_extend_result.changed
|
|
changed_when: true
|
|
|
|
# ============================================================
|
|
# PLAY 2: Wait for rebooted Swarm nodes to come back
|
|
# ============================================================
|
|
- name: Wait for Swarm nodes to return after reboot
|
|
hosts: swarm_hosts
|
|
become: false
|
|
gather_facts: false
|
|
tags: [proxmox_resize, in_guest]
|
|
|
|
tasks:
|
|
- name: Wait for SSH to become available (up to 2 minutes)
|
|
ansible.builtin.wait_for_connection:
|
|
delay: 10
|
|
timeout: 120
|
|
|
|
# ============================================================
|
|
# PLAY 3: In-guest layer — grow partition and filesystem
|
|
# ============================================================
|
|
- name: Grow in-guest root partition and filesystem on all Swarm nodes
|
|
hosts: swarm_hosts
|
|
become: true
|
|
gather_facts: true
|
|
tags: [in_guest]
|
|
roles:
|
|
- disk_grow
|