207 lines
8.4 KiB
YAML
207 lines
8.4 KiB
YAML
---
|
|
- name: Validate required replacement inputs
|
|
ansible.builtin.assert:
|
|
that:
|
|
- replacement_project_name | trim | length > 0
|
|
- replacement_old_logical_host in groups['proxmox_cluster']
|
|
- replacement_phase2_rebuild_and_rejoin | bool == false or replacement_new_physical_host in groups['proxmox_cluster']
|
|
- replacement_swarm_manager_name in groups['swarm_managers']
|
|
- replacement_swarm_worker_name in groups['swarm_workers']
|
|
fail_msg: >-
|
|
Missing replacement inputs or inventory groups. Ensure project name is set and
|
|
proxmox/swarm host groups contain the expected hosts.
|
|
success_msg: "Replacement input validation passed."
|
|
|
|
- name: Build replacement context values
|
|
ansible.builtin.set_fact:
|
|
proxmox_node_replacement_timestamp: "{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}"
|
|
proxmox_node_replacement_output_dir: "{{ replacement_output_root }}/{{ replacement_project_name | regex_replace('[^a-zA-Z0-9_-]', '_') }}-{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}"
|
|
|
|
- name: Print replacement plan summary
|
|
ansible.builtin.debug:
|
|
msg:
|
|
- "Project: {{ replacement_project_name }}"
|
|
- "Logical identity: {{ replacement_old_logical_host }} ({{ replacement_old_ip }})"
|
|
- "Replacement hardware: {{ replacement_new_physical_host }} ({{ replacement_new_physical_ip }})"
|
|
- "Swarm identities: {{ replacement_swarm_manager_name }}, {{ replacement_swarm_worker_name }}"
|
|
- "Execute cutover: {{ replacement_execute_cutover }}"
|
|
- "Power off old host: {{ replacement_poweroff_old_host }}"
|
|
|
|
- name: Preflight network reachability from control node
|
|
ansible.builtin.wait_for:
|
|
host: >-
|
|
{{
|
|
replacement_new_physical_ip
|
|
if item == replacement_new_physical_host and (hostvars[item] is not defined)
|
|
else (hostvars[item].ansible_host | default(item))
|
|
}}
|
|
port: 22
|
|
timeout: 5
|
|
connect_timeout: 2
|
|
state: started
|
|
delegate_to: localhost
|
|
loop:
|
|
- "{{ replacement_old_logical_host }}"
|
|
- "{{ replacement_new_physical_host }}"
|
|
- "{{ replacement_swarm_manager_name }}"
|
|
- "{{ replacement_swarm_worker_name }}"
|
|
when:
|
|
- not replacement_skip_runtime_checks | bool
|
|
- item != replacement_old_logical_host or not replacement_old_host_may_be_offline | bool
|
|
- item != replacement_new_physical_host or replacement_capture_baseline | bool or replacement_phase2_rebuild_and_rejoin | bool
|
|
|
|
- name: Capture swarm quorum state from manager host
|
|
ansible.builtin.command: docker node ls
|
|
register: proxmox_node_replacement_swarm_node_ls
|
|
changed_when: false
|
|
become: true
|
|
delegate_to: "{{ replacement_swarm_manager_name }}"
|
|
when: not replacement_skip_runtime_checks | bool
|
|
|
|
- name: Assert swarm quorum output is available
|
|
ansible.builtin.assert:
|
|
that:
|
|
- proxmox_node_replacement_swarm_node_ls.rc == 0
|
|
- proxmox_node_replacement_swarm_node_ls.stdout is search('Leader|Reachable')
|
|
fail_msg: "Swarm control plane is not healthy enough for a node replacement cutover."
|
|
success_msg: "Swarm quorum check passed."
|
|
when: not replacement_skip_runtime_checks | bool
|
|
|
|
- name: Create output directory for baseline artifacts
|
|
ansible.builtin.file:
|
|
path: "{{ proxmox_node_replacement_output_dir }}"
|
|
state: directory
|
|
mode: '0755'
|
|
delegate_to: localhost
|
|
when: replacement_capture_baseline | bool or replacement_execute_cutover | bool
|
|
|
|
- name: Capture old logical host VM list
|
|
ansible.builtin.command: /usr/sbin/qm list
|
|
register: proxmox_node_replacement_old_qm_list
|
|
changed_when: false
|
|
become: true
|
|
delegate_to: "{{ replacement_old_logical_host }}"
|
|
when: replacement_capture_baseline | bool
|
|
|
|
- name: Capture replacement physical host VM list
|
|
ansible.builtin.command: /usr/sbin/qm list
|
|
register: proxmox_node_replacement_new_qm_list
|
|
changed_when: false
|
|
become: true
|
|
delegate_to: "{{ replacement_new_physical_host }}"
|
|
when: replacement_capture_baseline | bool
|
|
|
|
- name: Capture old logical host cluster state
|
|
ansible.builtin.command: pvecm status
|
|
register: proxmox_node_replacement_old_cluster_status
|
|
changed_when: false
|
|
failed_when: false
|
|
become: true
|
|
delegate_to: "{{ replacement_old_logical_host }}"
|
|
when: replacement_capture_baseline | bool
|
|
|
|
- name: Capture replacement physical host cluster state
|
|
ansible.builtin.command: pvecm status
|
|
register: proxmox_node_replacement_new_cluster_status
|
|
changed_when: false
|
|
failed_when: false
|
|
become: true
|
|
delegate_to: "{{ replacement_new_physical_host }}"
|
|
when: replacement_capture_baseline | bool
|
|
|
|
- name: Write baseline artifact to controller
|
|
ansible.builtin.copy:
|
|
dest: "{{ proxmox_node_replacement_output_dir }}/baseline-summary.txt"
|
|
mode: '0644'
|
|
content: |
|
|
Project: {{ replacement_project_name }}
|
|
Timestamp: {{ proxmox_node_replacement_timestamp }}
|
|
Logical identity host: {{ replacement_old_logical_host }}
|
|
Logical identity IP: {{ replacement_old_ip }}
|
|
Replacement physical host: {{ replacement_new_physical_host }}
|
|
Replacement physical IP: {{ replacement_new_physical_ip }}
|
|
|
|
=== Swarm node ls (from {{ replacement_swarm_manager_name }}) ===
|
|
{{ proxmox_node_replacement_swarm_node_ls.stdout | default('') }}
|
|
|
|
=== QM list ({{ replacement_old_logical_host }}) ===
|
|
{{ proxmox_node_replacement_old_qm_list.stdout | default('not-captured') }}
|
|
|
|
=== QM list ({{ replacement_new_physical_host }}) ===
|
|
{{ proxmox_node_replacement_new_qm_list.stdout | default('not-captured') }}
|
|
|
|
=== pvecm status ({{ replacement_old_logical_host }}) ===
|
|
{{ proxmox_node_replacement_old_cluster_status.stdout | default('not-captured') }}
|
|
|
|
=== pvecm status ({{ replacement_new_physical_host }}) ===
|
|
{{ proxmox_node_replacement_new_cluster_status.stdout | default('not-captured') }}
|
|
delegate_to: localhost
|
|
when: replacement_capture_baseline | bool
|
|
|
|
- name: Explain cutover execution gate
|
|
ansible.builtin.debug:
|
|
msg: >-
|
|
Cutover actions are disabled. Set replacement_execute_cutover=true and
|
|
replacement_confirm_phrase=EXECUTE_NODE_REPLACEMENT to continue.
|
|
when: not replacement_execute_cutover | bool
|
|
|
|
- name: Enforce explicit confirmation phrase for cutover
|
|
ansible.builtin.assert:
|
|
that:
|
|
- replacement_confirm_phrase == 'EXECUTE_NODE_REPLACEMENT'
|
|
fail_msg: >-
|
|
Cutover requested without explicit confirmation phrase.
|
|
Set replacement_confirm_phrase=EXECUTE_NODE_REPLACEMENT.
|
|
when: replacement_execute_cutover | bool
|
|
|
|
- name: Build cutover TODO artifact
|
|
ansible.builtin.copy:
|
|
dest: "{{ proxmox_node_replacement_output_dir }}/cutover-todo.txt"
|
|
mode: '0644'
|
|
content: |
|
|
EXECUTION MODE ENABLED
|
|
|
|
Phase 2 execution switch:
|
|
- replacement_phase2_rebuild_and_rejoin={{ replacement_phase2_rebuild_and_rejoin }}
|
|
|
|
Phase 3 execution switch:
|
|
- replacement_phase3_identity_cutover={{ replacement_phase3_identity_cutover }}
|
|
|
|
Phase 4 execution switch:
|
|
- replacement_phase4_validate_cutover={{ replacement_phase4_validate_cutover }}
|
|
|
|
Manual steps still required around identity cutover:
|
|
1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on {{ replacement_new_physical_host }}.
|
|
2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots.
|
|
3. If phase 4 enabled, validate swarm quorum and optional service endpoints.
|
|
4. Move network identity {{ replacement_old_ip }} to replacement physical host.
|
|
5. If stable and approved, power off old host.
|
|
delegate_to: localhost
|
|
when: replacement_execute_cutover | bool
|
|
|
|
- name: Execute phase 2 rebuild and swarm rejoin on replacement host
|
|
ansible.builtin.include_tasks: phase2_rebuild_and_rejoin.yml
|
|
when:
|
|
- replacement_execute_cutover | bool
|
|
- replacement_phase2_rebuild_and_rejoin | bool
|
|
|
|
- name: Execute phase 3 identity cutover updates with rollback snapshots
|
|
ansible.builtin.include_tasks: phase3_identity_cutover.yml
|
|
when:
|
|
- replacement_execute_cutover | bool
|
|
- replacement_phase3_identity_cutover | bool
|
|
|
|
- name: Execute phase 4 post-cutover validation gates
|
|
ansible.builtin.include_tasks: phase4_validate_cutover.yml
|
|
when:
|
|
- replacement_execute_cutover | bool
|
|
- replacement_phase4_validate_cutover | bool
|
|
|
|
- name: Power off old logical host after explicit approval
|
|
ansible.builtin.command: systemctl poweroff
|
|
become: true
|
|
delegate_to: "{{ replacement_old_logical_host }}"
|
|
when:
|
|
- replacement_execute_cutover | bool
|
|
- replacement_poweroff_old_host | bool
|