188 lines
6.5 KiB
YAML
188 lines
6.5 KiB
YAML
---
|
|
# playbooks/proxmox/reconcile_cluster.yml
|
|
# Re-enable cluster services and reconcile Proxmox cluster membership.
|
|
#
|
|
# What this playbook does:
|
|
# 1. Ensures pve-cluster is running on all nodes
|
|
# 2. Creates a cluster on the primary node if missing
|
|
# 3. Joins remaining nodes if they are not yet members
|
|
# 4. Re-enables Corosync and HA services
|
|
# 5. Prints final cluster membership from the primary node
|
|
#
|
|
# Usage:
|
|
# ansible-playbook -i inventory/hosts.ini playbooks/proxmox/reconcile_cluster.yml
|
|
#
|
|
# Optional overrides:
|
|
# -e pve_cluster_name=homelab
|
|
# -e pve_primary_node=pve01
|
|
# -e cluster_mode=auto|primary|join
|
|
# -e pve_existing_cluster_ip=10.0.0.201
|
|
|
|
# ========================================
|
|
# PLAY 1: Setup root SSH trust (parallel)
|
|
# ========================================
|
|
- name: Setup root SSH trust for cluster operations
|
|
hosts: proxmox_cluster
|
|
become: true
|
|
gather_facts: false
|
|
|
|
tasks:
|
|
- name: Ensure root SSH key exists
|
|
ansible.builtin.stat:
|
|
path: /root/.ssh/id_rsa
|
|
register: root_ssh_key
|
|
|
|
- name: Generate root SSH key if missing
|
|
ansible.builtin.command: ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -N ""
|
|
args:
|
|
creates: /root/.ssh/id_ed25519
|
|
when: not root_ssh_key.stat.exists
|
|
|
|
- name: Fetch root's public SSH key
|
|
ansible.builtin.slurp:
|
|
src: "{{ '/root/.ssh/id_rsa.pub' if root_ssh_key.stat.exists else '/root/.ssh/id_ed25519.pub' }}"
|
|
register: root_pubkey
|
|
|
|
- name: Distribute root SSH keys across all cluster nodes
|
|
ansible.builtin.authorized_key:
|
|
user: root
|
|
key: "{{ hostvars[item].root_pubkey.content | b64decode }}"
|
|
state: present
|
|
loop: "{{ groups['proxmox_cluster'] }}"
|
|
when: hostvars[item].root_pubkey is defined
|
|
|
|
# ========================================
|
|
# PLAY 2: Cluster reconciliation (serial)
|
|
# ========================================
|
|
- name: Reconcile Proxmox cluster state
|
|
hosts: proxmox_cluster
|
|
become: true
|
|
gather_facts: true
|
|
serial: 1
|
|
|
|
vars:
|
|
pve_cluster_name: "homelab"
|
|
pve_primary_node: "{{ groups['proxmox_cluster'][0] }}"
|
|
pve_primary_ip: "{{ hostvars[pve_primary_node].ansible_host | default(pve_primary_node) }}"
|
|
# auto: create if needed on primary and join others
|
|
# primary: force primary-init behavior on target host(s)
|
|
# join: force join behavior on target host(s)
|
|
cluster_mode: "auto"
|
|
pve_existing_cluster_ip: ""
|
|
|
|
tasks:
|
|
- name: Validate inventory has Proxmox nodes
|
|
ansible.builtin.assert:
|
|
that:
|
|
- groups['proxmox_cluster'] | length >= 1
|
|
fail_msg: "Inventory group 'proxmox_cluster' is empty or undefined."
|
|
|
|
- name: Validate cluster_mode input
|
|
ansible.builtin.assert:
|
|
that:
|
|
- cluster_mode in ['auto', 'primary', 'join']
|
|
fail_msg: "cluster_mode must be one of: auto, primary, join"
|
|
|
|
- name: Resolve join target IP
|
|
ansible.builtin.set_fact:
|
|
pve_join_target_ip: "{{ pve_existing_cluster_ip | default('') | trim if (pve_existing_cluster_ip | default('') | trim | length > 0) else pve_primary_ip }}"
|
|
|
|
- name: Show reconcile plan
|
|
ansible.builtin.debug:
|
|
msg:
|
|
- "Primary node: {{ pve_primary_node }} ({{ pve_primary_ip }})"
|
|
- "Cluster name: {{ pve_cluster_name }}"
|
|
- "Cluster mode: {{ cluster_mode }}"
|
|
- "Join target IP: {{ pve_join_target_ip }}"
|
|
- "Target nodes: {{ groups['proxmox_cluster'] | join(', ') }}"
|
|
run_once: true
|
|
|
|
- name: Ensure pve-cluster service is enabled and running
|
|
ansible.builtin.systemd:
|
|
name: pve-cluster
|
|
enabled: true
|
|
state: started
|
|
|
|
- name: Check whether this node is already clustered
|
|
ansible.builtin.stat:
|
|
path: /etc/pve/corosync.conf
|
|
register: corosync_conf
|
|
|
|
- name: Create cluster on primary node when missing
|
|
ansible.builtin.command: "pvecm create {{ pve_cluster_name }}"
|
|
register: pvecm_create
|
|
changed_when: pvecm_create.rc == 0
|
|
when:
|
|
- cluster_mode in ['auto', 'primary']
|
|
- inventory_hostname == pve_primary_node or cluster_mode == 'primary'
|
|
- not corosync_conf.stat.exists
|
|
|
|
- name: Wait for corosync config to appear on primary
|
|
ansible.builtin.wait_for:
|
|
path: /etc/pve/corosync.conf
|
|
timeout: 60
|
|
when: inventory_hostname == pve_primary_node
|
|
|
|
- name: Test root SSH connectivity to primary node
|
|
ansible.builtin.command: "ssh -o BatchMode=yes root@{{ pve_join_target_ip }} hostname"
|
|
changed_when: false
|
|
failed_when: false
|
|
register: ssh_test
|
|
when:
|
|
- inventory_hostname != pve_primary_node
|
|
- not corosync_conf.stat.exists
|
|
|
|
- name: Warn if root SSH test failed
|
|
ansible.builtin.debug:
|
|
msg: "WARNING: Root SSH to {{ pve_join_target_ip }} failed. Cluster join may hang. Error: {{ ssh_test.stderr }}"
|
|
when:
|
|
- ssh_test is defined
|
|
- ssh_test.rc is defined
|
|
- ssh_test.rc != 0
|
|
|
|
- name: Join non-primary node to cluster when missing
|
|
ansible.builtin.command: "pvecm add {{ pve_join_target_ip }} --use_ssh 1"
|
|
register: pvecm_add
|
|
changed_when: pvecm_add.rc == 0
|
|
when:
|
|
- cluster_mode in ['auto', 'join']
|
|
- inventory_hostname != pve_primary_node or cluster_mode == 'join'
|
|
- not corosync_conf.stat.exists
|
|
|
|
- name: Re-check cluster membership config after create/join
|
|
ansible.builtin.stat:
|
|
path: /etc/pve/corosync.conf
|
|
register: corosync_conf_after
|
|
|
|
- name: Ensure Corosync service is enabled and running on clustered nodes
|
|
ansible.builtin.systemd:
|
|
name: corosync
|
|
enabled: true
|
|
state: started
|
|
when: corosync_conf_after.stat.exists
|
|
|
|
- name: Ensure pve-ha-lrm service is enabled and running on clustered nodes
|
|
ansible.builtin.systemd:
|
|
name: pve-ha-lrm
|
|
enabled: true
|
|
state: started
|
|
when: corosync_conf_after.stat.exists
|
|
|
|
- name: Ensure pve-ha-crm service is enabled and running on clustered nodes
|
|
ansible.builtin.systemd:
|
|
name: pve-ha-crm
|
|
enabled: true
|
|
state: started
|
|
when: corosync_conf_after.stat.exists
|
|
|
|
- name: Show cluster membership from primary
|
|
ansible.builtin.command: pvecm nodes
|
|
changed_when: false
|
|
register: pvecm_nodes
|
|
when: inventory_hostname == pve_primary_node
|
|
|
|
- name: Print cluster membership output
|
|
ansible.builtin.debug:
|
|
var: pvecm_nodes.stdout_lines
|
|
when: inventory_hostname == pve_primary_node
|