--- # playbooks/proxmox/provision_swarm_vms.yml # Provisions Ubuntu 24.04 VMs on Proxmox hosts for Docker Swarm # # Prerequisites: # - community.general collection installed (ansible-galaxy collection install community.general) # - Ubuntu 24.04 cloud image downloaded to Proxmox storage # - API token or root SSH access to Proxmox host # # Usage: # ansible-playbook -i inventory/hosts.ini playbooks/proxmox/provision_swarm_vms.yml -e target_host=pve01 # # Variables (can be overridden via -e or group_vars): # - pve_node_id: extracted from inventory (1-5) # - vm_template_name: base cloud-init template # - vm_storage: storage pool for VM disks # - vm_bridge: network bridge for VM NICs - name: Provision Swarm VMs on Proxmox hosts: "{{ target_host | default('proxmox_cluster') }}" gather_facts: true vars: # VM specifications (from standards doc) vm_disk_size: "32G" vm_memory_mb: 4096 vm_cores: 2 vm_storage: "local-lvm" vm_bridge: "vmbr0" # Cloud image settings cloud_image_url: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img" cloud_image_name: "noble-server-cloudimg-amd64.img" cloud_image_path: "/var/lib/vz/template/iso/{{ cloud_image_name }}" vm_template_vmid: "{{ 9000 + (node_index | int) }}" vm_template_name: "ubuntu-24.04-cloud-template-{{ node_index }}" # Derive node index from hostname (pve01 -> 1, pve02 -> 2, etc.) and coerce to integer node_index: "{{ (pve_node_id | default(inventory_hostname | regex_replace('[^0-9]', '')) ) | int }}" # VM IDs (unique per node: 101/102 for node 1, 201/202 for node 2, etc.) manager_vmid: "{{ (node_index | int) * 100 + 1 }}" worker_vmid: "{{ (node_index | int) * 100 + 2 }}" # VM names manager_name: "swarm-manager-{{ node_index }}" worker_name: "swarm-worker-{{ node_index }}" # Static IPs (from inventory scheme: managers .211-.215, workers .221-.225) manager_ip: "10.0.0.{{ 210 + (node_index | int) }}" worker_ip: "10.0.0.{{ 220 + (node_index | int) }}" network_cidr: "24" gateway_ip: "10.0.0.2" dns_primary: "10.0.0.2" dns_secondary: "8.8.8.8" # Cloud-init user vm_user: "chester" vm_ssh_key: "{{ lookup('file', lookup('env', 'HOME') + '/.ssh/id_ed25519.pub') }}" tasks: # ======================================== # SECTION 1: Download Cloud Image # ======================================== - name: Check if cloud image already exists ansible.builtin.stat: path: "{{ cloud_image_path }}" register: cloud_image_stat tags: ['template', 'download'] - name: Download Ubuntu 24.04 cloud image ansible.builtin.get_url: url: "{{ cloud_image_url }}" dest: "{{ cloud_image_path }}" mode: '0644' when: not cloud_image_stat.stat.exists tags: ['template', 'download'] # ======================================== # SECTION 2: Create VM Template # ======================================== - name: Check if VM template already exists ansible.builtin.shell: | qm status {{ vm_template_vmid }} 2>/dev/null && echo "exists" || echo "missing" register: template_check changed_when: false failed_when: false tags: ['template'] - name: Create VM template from cloud image when: "'missing' in template_check.stdout" tags: ['template'] block: - name: Create base VM for template ansible.builtin.shell: | qm create {{ vm_template_vmid }} \ --name {{ vm_template_name }} \ --memory 2048 \ --cores 2 \ --net0 virtio,bridge={{ vm_bridge }} \ --scsihw virtio-scsi-pci register: create_vm changed_when: false - name: Import cloud image as disk ansible.builtin.shell: | qm importdisk {{ vm_template_vmid }} {{ cloud_image_path }} {{ vm_storage }} register: import_disk changed_when: false - name: Attach imported disk to VM ansible.builtin.shell: | qm set {{ vm_template_vmid }} \ --scsi0 {{ vm_storage }}:vm-{{ vm_template_vmid }}-disk-0 \ --boot c \ --bootdisk scsi0 changed_when: false - name: Add cloud-init drive ansible.builtin.shell: | qm set {{ vm_template_vmid }} --ide2 {{ vm_storage }}:cloudinit changed_when: false - name: Configure serial console for cloud-init ansible.builtin.shell: | qm set {{ vm_template_vmid }} --serial0 socket --vga serial0 changed_when: false - name: Convert VM to template ansible.builtin.shell: | qm template {{ vm_template_vmid }} changed_when: false # ======================================== # SECTION 3: Clone and Configure Manager VM # ======================================== - name: Check if manager VM already exists ansible.builtin.shell: | qm status {{ manager_vmid }} 2>/dev/null && echo "exists" || echo "missing" register: manager_check changed_when: false failed_when: false tags: ['provision', 'manager'] - name: Provision Swarm Manager VM when: "'missing' in manager_check.stdout" tags: ['provision', 'manager'] block: - name: Clone template to manager VM ansible.builtin.shell: | qm clone {{ vm_template_vmid }} {{ manager_vmid }} \ --name {{ manager_name }} \ --full changed_when: false - name: Resize manager disk to {{ vm_disk_size }} ansible.builtin.shell: | qm resize {{ manager_vmid }} scsi0 {{ vm_disk_size }} changed_when: false - name: Configure manager VM resources ansible.builtin.shell: | qm set {{ manager_vmid }} \ --memory {{ vm_memory_mb }} \ --cores {{ vm_cores }} \ --onboot 1 \ --agent enabled=1 changed_when: false - name: Write SSH public key for manager ansible.builtin.copy: content: "{{ vm_ssh_key }}" dest: "/tmp/sshkey_{{ manager_vmid }}.pub" mode: '0644' - name: Configure manager cloud-init ansible.builtin.shell: | qm set {{ manager_vmid }} \ --ciuser {{ vm_user }} \ --sshkeys /tmp/sshkey_{{ manager_vmid }}.pub \ --ipconfig0 ip={{ manager_ip }}/{{ network_cidr }},gw={{ gateway_ip }} \ --nameserver {{ dns_primary }} \ --searchdomain local changed_when: false - name: Start manager VM ansible.builtin.shell: | qm start {{ manager_vmid }} changed_when: false - name: Display manager VM info ansible.builtin.debug: msg: "Manager VM {{ manager_name }} (ID: {{ manager_vmid }}) configured with IP {{ manager_ip }}" tags: ['provision', 'manager'] # ======================================== # SECTION 4: Clone and Configure Worker VM # ======================================== - name: Check if worker VM already exists ansible.builtin.shell: | qm status {{ worker_vmid }} 2>/dev/null && echo "exists" || echo "missing" register: worker_check changed_when: false failed_when: false tags: ['provision', 'worker'] - name: Provision Swarm Worker VM when: "'missing' in worker_check.stdout" tags: ['provision', 'worker'] block: - name: Clone template to worker VM ansible.builtin.shell: | qm clone {{ vm_template_vmid }} {{ worker_vmid }} \ --name {{ worker_name }} \ --full changed_when: false - name: Resize worker disk to {{ vm_disk_size }} ansible.builtin.shell: | qm resize {{ worker_vmid }} scsi0 {{ vm_disk_size }} changed_when: false - name: Configure worker VM resources ansible.builtin.shell: | qm set {{ worker_vmid }} \ --memory {{ vm_memory_mb }} \ --cores {{ vm_cores }} \ --onboot 1 \ --agent enabled=1 changed_when: false - name: Write SSH public key for worker ansible.builtin.copy: content: "{{ vm_ssh_key }}" dest: "/tmp/sshkey_{{ worker_vmid }}.pub" mode: '0644' - name: Configure worker cloud-init ansible.builtin.shell: | qm set {{ worker_vmid }} \ --ciuser {{ vm_user }} \ --sshkeys /tmp/sshkey_{{ worker_vmid }}.pub \ --ipconfig0 ip={{ worker_ip }}/{{ network_cidr }},gw={{ gateway_ip }} \ --nameserver {{ dns_primary }} \ --searchdomain local changed_when: false - name: Start worker VM ansible.builtin.shell: | qm start {{ worker_vmid }} changed_when: false - name: Display worker VM info ansible.builtin.debug: msg: "Worker VM {{ worker_name }} (ID: {{ worker_vmid }}) configured with IP {{ worker_ip }}" tags: ['provision', 'worker'] # ======================================== # SECTION 5: Idempotent Proxmox disk resize # WHY unconditional: the Provision blocks only run when a VM is absent. # An existing VM that predates vm_disk_size being set would be left # undersized. These tasks run on every invocation and are no-ops when # the disk is already at or above the target size. # WHY numeric comparison: qm resize cannot shrink; comparing parsed GB # values prevents an error when the disk is already correct. # ======================================== - name: Get current manager VM disk size ansible.builtin.shell: | qm config {{ manager_vmid }} | grep "^scsi0:" | grep -oP 'size=\K[^,\s]+' register: disk_grow_manager_current changed_when: false tags: ['provision', 'disks'] - name: Resize manager disk to {{ vm_disk_size }} if below target ansible.builtin.shell: | qm resize {{ manager_vmid }} scsi0 {{ vm_disk_size }} when: > (disk_grow_manager_current.stdout | regex_replace('[^0-9]', '') | int) < (vm_disk_size | regex_replace('[^0-9]', '') | int) tags: ['provision', 'disks'] - name: Get current worker VM disk size ansible.builtin.shell: | qm config {{ worker_vmid }} | grep "^scsi0:" | grep -oP 'size=\K[^,\s]+' register: disk_grow_worker_current changed_when: false tags: ['provision', 'disks'] - name: Resize worker disk to {{ vm_disk_size }} if below target ansible.builtin.shell: | qm resize {{ worker_vmid }} scsi0 {{ vm_disk_size }} when: > (disk_grow_worker_current.stdout | regex_replace('[^0-9]', '') | int) < (vm_disk_size | regex_replace('[^0-9]', '') | int) tags: ['provision', 'disks'] # ======================================== # SECTION 6: Wait for VMs to be ready # ======================================== - name: Wait for manager VM to be reachable via SSH ansible.builtin.wait_for: host: "{{ manager_ip }}" port: 22 delay: 30 timeout: 300 state: started tags: ['provision', 'wait'] - name: Wait for worker VM to be reachable via SSH ansible.builtin.wait_for: host: "{{ worker_ip }}" port: 22 delay: 30 timeout: 300 state: started tags: ['provision', 'wait'] - name: VM provisioning complete ansible.builtin.debug: msg: | ✅ VMs provisioned successfully on {{ inventory_hostname }}: - {{ manager_name }}: {{ manager_ip }} (VMID {{ manager_vmid }}) - {{ worker_name }}: {{ worker_ip }} (VMID {{ worker_vmid }}) Next steps: add VMs to in-memory inventory, install Docker, initialize Docker Swarm, and verify connectivity tags: ['provision'] - name: Add manager VMs to in-memory inventory ansible.builtin.add_host: name: "swarm-manager-{{ item | regex_replace('[^0-9]', '') | int }}" ansible_host: "10.0.0.{{ 210 + (item | regex_replace('[^0-9]', '') | int) }}" ansible_user: "{{ vm_user }}" groups: "swarm_managers,swarm_hosts" loop: "{{ groups['proxmox_cluster'] }}" run_once: true tags: ['provision'] - name: Add worker VMs to in-memory inventory ansible.builtin.add_host: name: "swarm-worker-{{ item | regex_replace('[^0-9]', '') | int }}" ansible_host: "10.0.0.{{ 220 + (item | regex_replace('[^0-9]', '') | int) }}" ansible_user: "{{ vm_user }}" groups: "swarm_workers,swarm_hosts" loop: "{{ groups['proxmox_cluster'] }}" run_once: true tags: ['provision'] # ======================================== # SECTION 6: Install Docker on VMs # ======================================== - name: Install Docker Engine (Docker CE) from official repo hosts: swarm_hosts become: true gather_facts: true vars: vm_user: chester tasks: - name: Install prerequisites for Docker ansible.builtin.apt: name: - ca-certificates - curl - gnupg - lsb-release - python3-jsondiff state: present update_cache: true tags: ['docker'] - name: Add Docker GPG key (dearmored) ansible.builtin.shell: | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg args: creates: /usr/share/keyrings/docker-archive-keyring.gpg tags: ['docker'] - name: Add Docker APT repository ansible.builtin.apt_repository: repo: "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" filename: docker state: present tags: ['docker'] - name: Update apt cache after adding Docker repo ansible.builtin.apt: update_cache: true tags: ['docker'] - name: Install Docker CE, CLI, containerd and compose plugin ansible.builtin.apt: name: - docker-ce - docker-ce-cli - containerd.io - docker-compose-plugin state: present update_cache: false tags: ['docker'] - name: Ensure Docker service is started and enabled ansible.builtin.systemd: name: docker state: started enabled: true tags: ['docker'] - name: Add '{{ vm_user }}' to docker group ansible.builtin.user: name: "{{ vm_user }}" groups: docker append: true tags: ['docker'] - name: Ensure /opt/stacks exists and is owned by '{{ vm_user }}' ansible.builtin.file: path: /opt/stacks state: directory owner: "{{ vm_user }}" group: "{{ vm_user }}" mode: '0755' tags: ['docker'] # ======================================== # SECTION 7: Initialize Docker Swarm and Join Nodes # ======================================== - name: Initialize Docker Swarm on manager VMs hosts: swarm_managers become: true gather_facts: false tasks: - name: Initialize swarm on primary manager (run once on first manager) ansible.builtin.command: > docker swarm init --advertise-addr {{ hostvars[groups['swarm_managers'][0]]['ansible_host'] }} delegate_to: "{{ groups['swarm_managers'][0] }}" run_once: true register: swarm_init failed_when: false changed_when: false - name: Get worker join token from leader ansible.builtin.command: docker swarm join-token -q worker delegate_to: "{{ groups['swarm_managers'][0] }}" run_once: true register: swarm_worker_token changed_when: false - name: Get manager join token from leader ansible.builtin.command: docker swarm join-token -q manager delegate_to: "{{ groups['swarm_managers'][0] }}" run_once: true register: swarm_manager_token changed_when: false - name: Join secondary managers as managers ansible.builtin.shell: > docker swarm join --token {{ swarm_manager_token.stdout }} {{ hostvars[groups['swarm_managers'][0]]['ansible_host'] }}:2377 when: inventory_hostname != groups['swarm_managers'][0] changed_when: false # Join workers (use tokens fetched from leader) - name: Join worker VMs to Docker Swarm hosts: swarm_workers become: true gather_facts: false tasks: - name: Fetch worker token from leader (delegated) ansible.builtin.command: docker swarm join-token -q worker delegate_to: "{{ groups['swarm_managers'][0] }}" run_once: true register: swarm_worker_token changed_when: false - name: Check if node is already part of a swarm ansible.builtin.command: docker info --format '{{"{{.Swarm.LocalNodeState}}"}}' register: swarm_state failed_when: false changed_when: false - name: Join this VM to swarm as worker ansible.builtin.shell: > docker swarm join --token {{ swarm_worker_token.stdout }} {{ hostvars[groups['swarm_managers'][0]]['ansible_host'] }}:2377 when: swarm_state.stdout not in ['active','pending'] changed_when: false - name: Verify Swarm Cluster from leader hosts: "{{ groups.get('swarm_managers', ['localhost'])[0] }}" become: true gather_facts: false tasks: - block: - name: Show docker nodes on leader ansible.builtin.command: docker node ls register: node_list failed_when: false changed_when: false - name: Debug node list ansible.builtin.debug: var: node_list.stdout_lines when: inventory_hostname in groups.get('swarm_managers', []) # ======================================== # SECTION 8: Connectivity Verification (All permutations) # ======================================== - name: Verify network connectivity between all Proxmox hosts and VMs hosts: proxmox_cluster,swarm_hosts gather_facts: false become: true tasks: - name: Build list of target IPs (run once) run_once: true ansible.builtin.set_fact: all_targets: > {{ (groups['proxmox_cluster'] | map('extract', hostvars, 'ansible_host') | list) + (groups['swarm_hosts'] | map('extract', hostvars, 'ansible_host') | list) }} - name: Check connectivity to all targets vars: target: "{{ item }}" ansible.builtin.command: ping -c 1 -W 1 {{ item }} register: ping_result failed_when: false changed_when: false loop: "{{ all_targets }}" - name: Report connectivity failures ansible.builtin.debug: msg: | From {{ inventory_hostname }} -> {{ item.item }} : rc={{ item.rc }} loop: "{{ ping_result.results }}" when: item.rc != 0 failed_when: false - name: Fail if any critical connectivity missing (optional) ansible.builtin.fail: msg: "Connectivity failures detected from {{ inventory_hostname }}" when: ping_result.results | selectattr('rc','ne',0) | list | length > 0 failed_when: false