--- - name: Validate required replacement inputs ansible.builtin.assert: that: - replacement_project_name | trim | length > 0 - replacement_old_logical_host in groups['proxmox_cluster'] - replacement_phase2_rebuild_and_rejoin | bool == false or replacement_new_physical_host in groups['proxmox_cluster'] - replacement_swarm_manager_name in groups['swarm_managers'] - replacement_swarm_worker_name in groups['swarm_workers'] fail_msg: >- Missing replacement inputs or inventory groups. Ensure project name is set and proxmox/swarm host groups contain the expected hosts. success_msg: "Replacement input validation passed." - name: Build replacement context values ansible.builtin.set_fact: proxmox_node_replacement_timestamp: "{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}" proxmox_node_replacement_output_dir: "{{ replacement_output_root }}/{{ replacement_project_name | regex_replace('[^a-zA-Z0-9_-]', '_') }}-{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}" - name: Print replacement plan summary ansible.builtin.debug: msg: - "Project: {{ replacement_project_name }}" - "Logical identity: {{ replacement_old_logical_host }} ({{ replacement_old_ip }})" - "Replacement hardware: {{ replacement_new_physical_host }} ({{ replacement_new_physical_ip }})" - "Swarm identities: {{ replacement_swarm_manager_name }}, {{ replacement_swarm_worker_name }}" - "Execute cutover: {{ replacement_execute_cutover }}" - "Power off old host: {{ replacement_poweroff_old_host }}" - name: Preflight network reachability from control node ansible.builtin.wait_for: host: >- {{ replacement_new_physical_ip if item == replacement_new_physical_host and (hostvars[item] is not defined) else (hostvars[item].ansible_host | default(item)) }} port: 22 timeout: 5 connect_timeout: 2 state: started delegate_to: localhost loop: - "{{ replacement_old_logical_host }}" - "{{ replacement_new_physical_host }}" - "{{ replacement_swarm_manager_name }}" - "{{ replacement_swarm_worker_name }}" when: - not replacement_skip_runtime_checks | bool - item != replacement_old_logical_host or not replacement_old_host_may_be_offline | bool - item != replacement_new_physical_host or replacement_capture_baseline | bool or replacement_phase2_rebuild_and_rejoin | bool - name: Capture swarm quorum state from manager host ansible.builtin.command: docker node ls register: proxmox_node_replacement_swarm_node_ls changed_when: false become: true delegate_to: "{{ replacement_swarm_manager_name }}" when: not replacement_skip_runtime_checks | bool - name: Assert swarm quorum output is available ansible.builtin.assert: that: - proxmox_node_replacement_swarm_node_ls.rc == 0 - proxmox_node_replacement_swarm_node_ls.stdout is search('Leader|Reachable') fail_msg: "Swarm control plane is not healthy enough for a node replacement cutover." success_msg: "Swarm quorum check passed." when: not replacement_skip_runtime_checks | bool - name: Create output directory for baseline artifacts ansible.builtin.file: path: "{{ proxmox_node_replacement_output_dir }}" state: directory mode: '0755' delegate_to: localhost when: replacement_capture_baseline | bool or replacement_execute_cutover | bool - name: Capture old logical host VM list ansible.builtin.command: /usr/sbin/qm list register: proxmox_node_replacement_old_qm_list changed_when: false become: true delegate_to: "{{ replacement_old_logical_host }}" when: replacement_capture_baseline | bool - name: Capture replacement physical host VM list ansible.builtin.command: /usr/sbin/qm list register: proxmox_node_replacement_new_qm_list changed_when: false become: true delegate_to: "{{ replacement_new_physical_host }}" when: replacement_capture_baseline | bool - name: Capture old logical host cluster state ansible.builtin.command: pvecm status register: proxmox_node_replacement_old_cluster_status changed_when: false failed_when: false become: true delegate_to: "{{ replacement_old_logical_host }}" when: replacement_capture_baseline | bool - name: Capture replacement physical host cluster state ansible.builtin.command: pvecm status register: proxmox_node_replacement_new_cluster_status changed_when: false failed_when: false become: true delegate_to: "{{ replacement_new_physical_host }}" when: replacement_capture_baseline | bool - name: Write baseline artifact to controller ansible.builtin.copy: dest: "{{ proxmox_node_replacement_output_dir }}/baseline-summary.txt" mode: '0644' content: | Project: {{ replacement_project_name }} Timestamp: {{ proxmox_node_replacement_timestamp }} Logical identity host: {{ replacement_old_logical_host }} Logical identity IP: {{ replacement_old_ip }} Replacement physical host: {{ replacement_new_physical_host }} Replacement physical IP: {{ replacement_new_physical_ip }} === Swarm node ls (from {{ replacement_swarm_manager_name }}) === {{ proxmox_node_replacement_swarm_node_ls.stdout | default('') }} === QM list ({{ replacement_old_logical_host }}) === {{ proxmox_node_replacement_old_qm_list.stdout | default('not-captured') }} === QM list ({{ replacement_new_physical_host }}) === {{ proxmox_node_replacement_new_qm_list.stdout | default('not-captured') }} === pvecm status ({{ replacement_old_logical_host }}) === {{ proxmox_node_replacement_old_cluster_status.stdout | default('not-captured') }} === pvecm status ({{ replacement_new_physical_host }}) === {{ proxmox_node_replacement_new_cluster_status.stdout | default('not-captured') }} delegate_to: localhost when: replacement_capture_baseline | bool - name: Explain cutover execution gate ansible.builtin.debug: msg: >- Cutover actions are disabled. Set replacement_execute_cutover=true and replacement_confirm_phrase=EXECUTE_NODE_REPLACEMENT to continue. when: not replacement_execute_cutover | bool - name: Enforce explicit confirmation phrase for cutover ansible.builtin.assert: that: - replacement_confirm_phrase == 'EXECUTE_NODE_REPLACEMENT' fail_msg: >- Cutover requested without explicit confirmation phrase. Set replacement_confirm_phrase=EXECUTE_NODE_REPLACEMENT. when: replacement_execute_cutover | bool - name: Build cutover TODO artifact ansible.builtin.copy: dest: "{{ proxmox_node_replacement_output_dir }}/cutover-todo.txt" mode: '0644' content: | EXECUTION MODE ENABLED Phase 2 execution switch: - replacement_phase2_rebuild_and_rejoin={{ replacement_phase2_rebuild_and_rejoin }} Phase 3 execution switch: - replacement_phase3_identity_cutover={{ replacement_phase3_identity_cutover }} Phase 4 execution switch: - replacement_phase4_validate_cutover={{ replacement_phase4_validate_cutover }} Manual steps still required around identity cutover: 1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on {{ replacement_new_physical_host }}. 2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots. 3. If phase 4 enabled, validate swarm quorum and optional service endpoints. 4. Move network identity {{ replacement_old_ip }} to replacement physical host. 5. If stable and approved, power off old host. delegate_to: localhost when: replacement_execute_cutover | bool - name: Execute phase 2 rebuild and swarm rejoin on replacement host ansible.builtin.include_tasks: phase2_rebuild_and_rejoin.yml when: - replacement_execute_cutover | bool - replacement_phase2_rebuild_and_rejoin | bool - name: Execute phase 3 identity cutover updates with rollback snapshots ansible.builtin.include_tasks: phase3_identity_cutover.yml when: - replacement_execute_cutover | bool - replacement_phase3_identity_cutover | bool - name: Execute phase 4 post-cutover validation gates ansible.builtin.include_tasks: phase4_validate_cutover.yml when: - replacement_execute_cutover | bool - replacement_phase4_validate_cutover | bool - name: Power off old logical host after explicit approval ansible.builtin.command: systemctl poweroff become: true delegate_to: "{{ replacement_old_logical_host }}" when: - replacement_execute_cutover | bool - replacement_poweroff_old_host | bool