543 lines
19 KiB
YAML
543 lines
19 KiB
YAML
---
|
|
# playbooks/proxmox/provision_swarm_vms.yml
|
|
# Provisions Ubuntu 24.04 VMs on Proxmox hosts for Docker Swarm
|
|
#
|
|
# Prerequisites:
|
|
# - community.general collection installed (ansible-galaxy collection install community.general)
|
|
# - Ubuntu 24.04 cloud image downloaded to Proxmox storage
|
|
# - API token or root SSH access to Proxmox host
|
|
#
|
|
# Usage:
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/provision_swarm_vms.yml -e target_host=pve01
|
|
#
|
|
# Variables (can be overridden via -e or group_vars):
|
|
# - pve_node_id: extracted from inventory (1-5)
|
|
# - vm_template_name: base cloud-init template
|
|
# - vm_storage: storage pool for VM disks
|
|
# - vm_bridge: network bridge for VM NICs
|
|
|
|
- name: Provision Swarm VMs on Proxmox
|
|
hosts: "{{ target_host | default('proxmox_cluster') }}"
|
|
gather_facts: true
|
|
vars:
|
|
# VM specifications (from standards doc)
|
|
vm_disk_size: "32G"
|
|
vm_memory_mb: 4096
|
|
vm_cores: 2
|
|
vm_storage: "local-lvm"
|
|
vm_bridge: "vmbr0"
|
|
|
|
# Cloud image settings
|
|
cloud_image_url: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img"
|
|
cloud_image_name: "noble-server-cloudimg-amd64.img"
|
|
cloud_image_path: "/var/lib/vz/template/iso/{{ cloud_image_name }}"
|
|
vm_template_vmid: "{{ 9000 + (node_index | int) }}"
|
|
vm_template_name: "ubuntu-24.04-cloud-template-{{ node_index }}"
|
|
|
|
# Derive node index from hostname (pve01 -> 1, pve02 -> 2, etc.) and coerce to integer
|
|
node_index: "{{ (pve_node_id | default(inventory_hostname | regex_replace('[^0-9]', '')) ) | int }}"
|
|
|
|
# VM IDs (unique per node: 101/102 for node 1, 201/202 for node 2, etc.)
|
|
manager_vmid: "{{ (node_index | int) * 100 + 1 }}"
|
|
worker_vmid: "{{ (node_index | int) * 100 + 2 }}"
|
|
|
|
# VM names
|
|
manager_name: "swarm-manager-{{ node_index }}"
|
|
worker_name: "swarm-worker-{{ node_index }}"
|
|
|
|
# Static IPs (from inventory scheme: managers .211-.215, workers .221-.225)
|
|
manager_ip: "10.0.0.{{ 210 + (node_index | int) }}"
|
|
worker_ip: "10.0.0.{{ 220 + (node_index | int) }}"
|
|
network_cidr: "24"
|
|
gateway_ip: "10.0.0.2"
|
|
dns_primary: "10.0.0.2"
|
|
dns_secondary: "8.8.8.8"
|
|
|
|
# Cloud-init user
|
|
vm_user: "chester"
|
|
vm_ssh_key: "{{ lookup('file', lookup('env', 'HOME') + '/.ssh/id_ed25519.pub') }}"
|
|
|
|
tasks:
|
|
# ========================================
|
|
# SECTION 1: Download Cloud Image
|
|
# ========================================
|
|
- name: Check if cloud image already exists
|
|
ansible.builtin.stat:
|
|
path: "{{ cloud_image_path }}"
|
|
register: cloud_image_stat
|
|
tags: ['template', 'download']
|
|
|
|
- name: Download Ubuntu 24.04 cloud image
|
|
ansible.builtin.get_url:
|
|
url: "{{ cloud_image_url }}"
|
|
dest: "{{ cloud_image_path }}"
|
|
mode: '0644'
|
|
when: not cloud_image_stat.stat.exists
|
|
tags: ['template', 'download']
|
|
|
|
# ========================================
|
|
# SECTION 2: Create VM Template
|
|
# ========================================
|
|
- name: Check if VM template already exists
|
|
ansible.builtin.shell: |
|
|
qm status {{ vm_template_vmid }} 2>/dev/null && echo "exists" || echo "missing"
|
|
register: template_check
|
|
changed_when: false
|
|
failed_when: false
|
|
tags: ['template']
|
|
|
|
- name: Create VM template from cloud image
|
|
when: "'missing' in template_check.stdout"
|
|
tags: ['template']
|
|
block:
|
|
- name: Create base VM for template
|
|
ansible.builtin.shell: |
|
|
qm create {{ vm_template_vmid }} \
|
|
--name {{ vm_template_name }} \
|
|
--memory 2048 \
|
|
--cores 2 \
|
|
--net0 virtio,bridge={{ vm_bridge }} \
|
|
--scsihw virtio-scsi-pci
|
|
register: create_vm
|
|
changed_when: false
|
|
|
|
- name: Import cloud image as disk
|
|
ansible.builtin.shell: |
|
|
qm importdisk {{ vm_template_vmid }} {{ cloud_image_path }} {{ vm_storage }}
|
|
register: import_disk
|
|
changed_when: false
|
|
|
|
- name: Attach imported disk to VM
|
|
ansible.builtin.shell: |
|
|
qm set {{ vm_template_vmid }} \
|
|
--scsi0 {{ vm_storage }}:vm-{{ vm_template_vmid }}-disk-0 \
|
|
--boot c \
|
|
--bootdisk scsi0
|
|
changed_when: false
|
|
|
|
- name: Add cloud-init drive
|
|
ansible.builtin.shell: |
|
|
qm set {{ vm_template_vmid }} --ide2 {{ vm_storage }}:cloudinit
|
|
changed_when: false
|
|
|
|
- name: Configure serial console for cloud-init
|
|
ansible.builtin.shell: |
|
|
qm set {{ vm_template_vmid }} --serial0 socket --vga serial0
|
|
changed_when: false
|
|
|
|
- name: Convert VM to template
|
|
ansible.builtin.shell: |
|
|
qm template {{ vm_template_vmid }}
|
|
changed_when: false
|
|
|
|
# ========================================
|
|
# SECTION 3: Clone and Configure Manager VM
|
|
# ========================================
|
|
- name: Check if manager VM already exists
|
|
ansible.builtin.shell: |
|
|
qm status {{ manager_vmid }} 2>/dev/null && echo "exists" || echo "missing"
|
|
register: manager_check
|
|
changed_when: false
|
|
failed_when: false
|
|
tags: ['provision', 'manager']
|
|
|
|
- name: Provision Swarm Manager VM
|
|
when: "'missing' in manager_check.stdout"
|
|
tags: ['provision', 'manager']
|
|
block:
|
|
- name: Clone template to manager VM
|
|
ansible.builtin.shell: |
|
|
qm clone {{ vm_template_vmid }} {{ manager_vmid }} \
|
|
--name {{ manager_name }} \
|
|
--full
|
|
changed_when: false
|
|
|
|
- name: Resize manager disk to {{ vm_disk_size }}
|
|
ansible.builtin.shell: |
|
|
qm resize {{ manager_vmid }} scsi0 {{ vm_disk_size }}
|
|
changed_when: false
|
|
|
|
- name: Configure manager VM resources
|
|
ansible.builtin.shell: |
|
|
qm set {{ manager_vmid }} \
|
|
--memory {{ vm_memory_mb }} \
|
|
--cores {{ vm_cores }} \
|
|
--onboot 1 \
|
|
--agent enabled=1
|
|
changed_when: false
|
|
|
|
- name: Write SSH public key for manager
|
|
ansible.builtin.copy:
|
|
content: "{{ vm_ssh_key }}"
|
|
dest: "/tmp/sshkey_{{ manager_vmid }}.pub"
|
|
mode: '0644'
|
|
|
|
- name: Configure manager cloud-init
|
|
ansible.builtin.shell: |
|
|
qm set {{ manager_vmid }} \
|
|
--ciuser {{ vm_user }} \
|
|
--sshkeys /tmp/sshkey_{{ manager_vmid }}.pub \
|
|
--ipconfig0 ip={{ manager_ip }}/{{ network_cidr }},gw={{ gateway_ip }} \
|
|
--nameserver {{ dns_primary }} \
|
|
--searchdomain local
|
|
changed_when: false
|
|
|
|
- name: Start manager VM
|
|
ansible.builtin.shell: |
|
|
qm start {{ manager_vmid }}
|
|
changed_when: false
|
|
|
|
- name: Display manager VM info
|
|
ansible.builtin.debug:
|
|
msg: "Manager VM {{ manager_name }} (ID: {{ manager_vmid }}) configured with IP {{ manager_ip }}"
|
|
tags: ['provision', 'manager']
|
|
|
|
# ========================================
|
|
# SECTION 4: Clone and Configure Worker VM
|
|
# ========================================
|
|
- name: Check if worker VM already exists
|
|
ansible.builtin.shell: |
|
|
qm status {{ worker_vmid }} 2>/dev/null && echo "exists" || echo "missing"
|
|
register: worker_check
|
|
changed_when: false
|
|
failed_when: false
|
|
tags: ['provision', 'worker']
|
|
|
|
- name: Provision Swarm Worker VM
|
|
when: "'missing' in worker_check.stdout"
|
|
tags: ['provision', 'worker']
|
|
block:
|
|
- name: Clone template to worker VM
|
|
ansible.builtin.shell: |
|
|
qm clone {{ vm_template_vmid }} {{ worker_vmid }} \
|
|
--name {{ worker_name }} \
|
|
--full
|
|
changed_when: false
|
|
|
|
- name: Resize worker disk to {{ vm_disk_size }}
|
|
ansible.builtin.shell: |
|
|
qm resize {{ worker_vmid }} scsi0 {{ vm_disk_size }}
|
|
changed_when: false
|
|
|
|
- name: Configure worker VM resources
|
|
ansible.builtin.shell: |
|
|
qm set {{ worker_vmid }} \
|
|
--memory {{ vm_memory_mb }} \
|
|
--cores {{ vm_cores }} \
|
|
--onboot 1 \
|
|
--agent enabled=1
|
|
changed_when: false
|
|
|
|
- name: Write SSH public key for worker
|
|
ansible.builtin.copy:
|
|
content: "{{ vm_ssh_key }}"
|
|
dest: "/tmp/sshkey_{{ worker_vmid }}.pub"
|
|
mode: '0644'
|
|
|
|
- name: Configure worker cloud-init
|
|
ansible.builtin.shell: |
|
|
qm set {{ worker_vmid }} \
|
|
--ciuser {{ vm_user }} \
|
|
--sshkeys /tmp/sshkey_{{ worker_vmid }}.pub \
|
|
--ipconfig0 ip={{ worker_ip }}/{{ network_cidr }},gw={{ gateway_ip }} \
|
|
--nameserver {{ dns_primary }} \
|
|
--searchdomain local
|
|
changed_when: false
|
|
|
|
- name: Start worker VM
|
|
ansible.builtin.shell: |
|
|
qm start {{ worker_vmid }}
|
|
changed_when: false
|
|
|
|
- name: Display worker VM info
|
|
ansible.builtin.debug:
|
|
msg: "Worker VM {{ worker_name }} (ID: {{ worker_vmid }}) configured with IP {{ worker_ip }}"
|
|
tags: ['provision', 'worker']
|
|
|
|
# ========================================
|
|
# SECTION 5: Idempotent Proxmox disk resize
|
|
# WHY unconditional: the Provision blocks only run when a VM is absent.
|
|
# An existing VM that predates vm_disk_size being set would be left
|
|
# undersized. These tasks run on every invocation and are no-ops when
|
|
# the disk is already at or above the target size.
|
|
# WHY numeric comparison: qm resize cannot shrink; comparing parsed GB
|
|
# values prevents an error when the disk is already correct.
|
|
# ========================================
|
|
|
|
- name: Get current manager VM disk size
|
|
ansible.builtin.shell: |
|
|
qm config {{ manager_vmid }} | grep "^scsi0:" | grep -oP 'size=\K[^,\s]+'
|
|
register: disk_grow_manager_current
|
|
changed_when: false
|
|
tags: ['provision', 'disks']
|
|
|
|
- name: Resize manager disk to {{ vm_disk_size }} if below target
|
|
ansible.builtin.shell: |
|
|
qm resize {{ manager_vmid }} scsi0 {{ vm_disk_size }}
|
|
when: >
|
|
(disk_grow_manager_current.stdout | regex_replace('[^0-9]', '') | int)
|
|
< (vm_disk_size | regex_replace('[^0-9]', '') | int)
|
|
tags: ['provision', 'disks']
|
|
|
|
- name: Get current worker VM disk size
|
|
ansible.builtin.shell: |
|
|
qm config {{ worker_vmid }} | grep "^scsi0:" | grep -oP 'size=\K[^,\s]+'
|
|
register: disk_grow_worker_current
|
|
changed_when: false
|
|
tags: ['provision', 'disks']
|
|
|
|
- name: Resize worker disk to {{ vm_disk_size }} if below target
|
|
ansible.builtin.shell: |
|
|
qm resize {{ worker_vmid }} scsi0 {{ vm_disk_size }}
|
|
when: >
|
|
(disk_grow_worker_current.stdout | regex_replace('[^0-9]', '') | int)
|
|
< (vm_disk_size | regex_replace('[^0-9]', '') | int)
|
|
tags: ['provision', 'disks']
|
|
|
|
# ========================================
|
|
# SECTION 6: Wait for VMs to be ready
|
|
# ========================================
|
|
- name: Wait for manager VM to be reachable via SSH
|
|
ansible.builtin.wait_for:
|
|
host: "{{ manager_ip }}"
|
|
port: 22
|
|
delay: 30
|
|
timeout: 300
|
|
state: started
|
|
tags: ['provision', 'wait']
|
|
|
|
- name: Wait for worker VM to be reachable via SSH
|
|
ansible.builtin.wait_for:
|
|
host: "{{ worker_ip }}"
|
|
port: 22
|
|
delay: 30
|
|
timeout: 300
|
|
state: started
|
|
tags: ['provision', 'wait']
|
|
|
|
- name: VM provisioning complete
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
✅ VMs provisioned successfully on {{ inventory_hostname }}:
|
|
- {{ manager_name }}: {{ manager_ip }} (VMID {{ manager_vmid }})
|
|
- {{ worker_name }}: {{ worker_ip }} (VMID {{ worker_vmid }})
|
|
|
|
Next steps: add VMs to in-memory inventory, install Docker, initialize Docker Swarm, and verify connectivity
|
|
tags: ['provision']
|
|
|
|
- name: Add manager VMs to in-memory inventory
|
|
ansible.builtin.add_host:
|
|
name: "swarm-manager-{{ item | regex_replace('[^0-9]', '') | int }}"
|
|
ansible_host: "10.0.0.{{ 210 + (item | regex_replace('[^0-9]', '') | int) }}"
|
|
ansible_user: "{{ vm_user }}"
|
|
groups: "swarm_managers,swarm_hosts"
|
|
loop: "{{ groups['proxmox_cluster'] }}"
|
|
run_once: true
|
|
tags: ['provision']
|
|
|
|
- name: Add worker VMs to in-memory inventory
|
|
ansible.builtin.add_host:
|
|
name: "swarm-worker-{{ item | regex_replace('[^0-9]', '') | int }}"
|
|
ansible_host: "10.0.0.{{ 220 + (item | regex_replace('[^0-9]', '') | int) }}"
|
|
ansible_user: "{{ vm_user }}"
|
|
groups: "swarm_workers,swarm_hosts"
|
|
loop: "{{ groups['proxmox_cluster'] }}"
|
|
run_once: true
|
|
tags: ['provision']
|
|
|
|
# ========================================
|
|
# SECTION 6: Install Docker on VMs
|
|
# ========================================
|
|
- name: Install Docker Engine (Docker CE) from official repo
|
|
hosts: swarm_hosts
|
|
become: true
|
|
gather_facts: true
|
|
vars:
|
|
vm_user: chester
|
|
|
|
tasks:
|
|
- name: Install prerequisites for Docker
|
|
ansible.builtin.apt:
|
|
name:
|
|
- ca-certificates
|
|
- curl
|
|
- gnupg
|
|
- lsb-release
|
|
- python3-jsondiff
|
|
state: present
|
|
update_cache: true
|
|
tags: ['docker']
|
|
|
|
- name: Add Docker GPG key (dearmored)
|
|
ansible.builtin.shell: |
|
|
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
|
|
args:
|
|
creates: /usr/share/keyrings/docker-archive-keyring.gpg
|
|
tags: ['docker']
|
|
|
|
- name: Add Docker APT repository
|
|
ansible.builtin.apt_repository:
|
|
repo: "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
|
|
filename: docker
|
|
state: present
|
|
tags: ['docker']
|
|
|
|
- name: Update apt cache after adding Docker repo
|
|
ansible.builtin.apt:
|
|
update_cache: true
|
|
tags: ['docker']
|
|
|
|
- name: Install Docker CE, CLI, containerd and compose plugin
|
|
ansible.builtin.apt:
|
|
name:
|
|
- docker-ce
|
|
- docker-ce-cli
|
|
- containerd.io
|
|
- docker-compose-plugin
|
|
state: present
|
|
update_cache: false
|
|
tags: ['docker']
|
|
|
|
- name: Ensure Docker service is started and enabled
|
|
ansible.builtin.systemd:
|
|
name: docker
|
|
state: started
|
|
enabled: true
|
|
tags: ['docker']
|
|
|
|
- name: Add '{{ vm_user }}' to docker group
|
|
ansible.builtin.user:
|
|
name: "{{ vm_user }}"
|
|
groups: docker
|
|
append: true
|
|
tags: ['docker']
|
|
|
|
- name: Ensure /opt/stacks exists and is owned by '{{ vm_user }}'
|
|
ansible.builtin.file:
|
|
path: /opt/stacks
|
|
state: directory
|
|
owner: "{{ vm_user }}"
|
|
group: "{{ vm_user }}"
|
|
mode: '0755'
|
|
tags: ['docker']
|
|
|
|
# ========================================
|
|
# SECTION 7: Initialize Docker Swarm and Join Nodes
|
|
# ========================================
|
|
- name: Initialize Docker Swarm on manager VMs
|
|
hosts: swarm_managers
|
|
become: true
|
|
gather_facts: false
|
|
|
|
tasks:
|
|
- name: Initialize swarm on primary manager (run once on first manager)
|
|
ansible.builtin.command: >
|
|
docker swarm init --advertise-addr {{ hostvars[groups['swarm_managers'][0]]['ansible_host'] }}
|
|
delegate_to: "{{ groups['swarm_managers'][0] }}"
|
|
run_once: true
|
|
register: swarm_init
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Get worker join token from leader
|
|
ansible.builtin.command: docker swarm join-token -q worker
|
|
delegate_to: "{{ groups['swarm_managers'][0] }}"
|
|
run_once: true
|
|
register: swarm_worker_token
|
|
changed_when: false
|
|
|
|
- name: Get manager join token from leader
|
|
ansible.builtin.command: docker swarm join-token -q manager
|
|
delegate_to: "{{ groups['swarm_managers'][0] }}"
|
|
run_once: true
|
|
register: swarm_manager_token
|
|
changed_when: false
|
|
|
|
- name: Join secondary managers as managers
|
|
ansible.builtin.shell: >
|
|
docker swarm join --token {{ swarm_manager_token.stdout }} {{ hostvars[groups['swarm_managers'][0]]['ansible_host'] }}:2377
|
|
when: inventory_hostname != groups['swarm_managers'][0]
|
|
changed_when: false
|
|
|
|
# Join workers (use tokens fetched from leader)
|
|
- name: Join worker VMs to Docker Swarm
|
|
hosts: swarm_workers
|
|
become: true
|
|
gather_facts: false
|
|
|
|
tasks:
|
|
- name: Fetch worker token from leader (delegated)
|
|
ansible.builtin.command: docker swarm join-token -q worker
|
|
delegate_to: "{{ groups['swarm_managers'][0] }}"
|
|
run_once: true
|
|
register: swarm_worker_token
|
|
changed_when: false
|
|
|
|
- name: Check if node is already part of a swarm
|
|
ansible.builtin.command: docker info --format '{{"{{.Swarm.LocalNodeState}}"}}'
|
|
register: swarm_state
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Join this VM to swarm as worker
|
|
ansible.builtin.shell: >
|
|
docker swarm join --token {{ swarm_worker_token.stdout }} {{ hostvars[groups['swarm_managers'][0]]['ansible_host'] }}:2377
|
|
when: swarm_state.stdout not in ['active','pending']
|
|
changed_when: false
|
|
|
|
- name: Verify Swarm Cluster from leader
|
|
hosts: "{{ groups.get('swarm_managers', ['localhost'])[0] }}"
|
|
become: true
|
|
gather_facts: false
|
|
|
|
tasks:
|
|
- block:
|
|
- name: Show docker nodes on leader
|
|
ansible.builtin.command: docker node ls
|
|
register: node_list
|
|
failed_when: false
|
|
changed_when: false
|
|
|
|
- name: Debug node list
|
|
ansible.builtin.debug:
|
|
var: node_list.stdout_lines
|
|
when: inventory_hostname in groups.get('swarm_managers', [])
|
|
|
|
# ========================================
|
|
# SECTION 8: Connectivity Verification (All permutations)
|
|
# ========================================
|
|
- name: Verify network connectivity between all Proxmox hosts and VMs
|
|
hosts: proxmox_cluster,swarm_hosts
|
|
gather_facts: false
|
|
become: true
|
|
|
|
tasks:
|
|
- name: Build list of target IPs (run once)
|
|
run_once: true
|
|
ansible.builtin.set_fact:
|
|
all_targets: >
|
|
{{ (groups['proxmox_cluster'] | map('extract', hostvars, 'ansible_host') | list) + (groups['swarm_hosts'] | map('extract', hostvars, 'ansible_host') | list) }}
|
|
|
|
- name: Check connectivity to all targets
|
|
vars:
|
|
target: "{{ item }}"
|
|
ansible.builtin.command: ping -c 1 -W 1 {{ item }}
|
|
register: ping_result
|
|
failed_when: false
|
|
changed_when: false
|
|
loop: "{{ all_targets }}"
|
|
|
|
- name: Report connectivity failures
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
From {{ inventory_hostname }} -> {{ item.item }} : rc={{ item.rc }}
|
|
loop: "{{ ping_result.results }}"
|
|
when: item.rc != 0
|
|
failed_when: false
|
|
|
|
- name: Fail if any critical connectivity missing (optional)
|
|
ansible.builtin.fail:
|
|
msg: "Connectivity failures detected from {{ inventory_hostname }}"
|
|
when: ping_result.results | selectattr('rc','ne',0) | list | length > 0
|
|
failed_when: false
|