453 lines
15 KiB
YAML

---
# End-to-end Docker Swarm bootstrap from the primary manager.
- name: Validate required inventory groups exist
ansible.builtin.assert:
that:
- groups['swarm_managers'] is defined
- groups['swarm_managers'] | length > 0
- groups['swarm_workers'] is defined
- groups['swarm_workers'] | length > 0
fail_msg: "Inventory must define non-empty swarm_managers and swarm_workers groups."
success_msg: "Required Swarm inventory groups are present."
tags: [always]
- name: Build swarm host lists
ansible.builtin.set_fact:
swarm_primary_manager: "{{ groups['swarm_managers'][0] }}"
swarm_secondary_managers: "{{ groups['swarm_managers'][1:] }}"
swarm_all_nodes: "{{ groups['swarm_managers'] + groups['swarm_workers'] }}"
tags: [always]
- name: Detect runtime IPv4 on primary manager
ansible.builtin.shell: ip -4 route get 1.1.1.1 | awk '{print $7; exit}'
args:
executable: /bin/bash
changed_when: false
check_mode: false
register: swarm_primary_runtime_ip
become: true
delegate_to: "{{ swarm_primary_manager }}"
tags: [always]
- name: Validate runtime IPv4 detection on primary manager
ansible.builtin.assert:
that:
- swarm_primary_runtime_ip.stdout | trim | length > 0
- swarm_primary_runtime_ip.stdout | trim != ansible_host | default('127.0.0.1')
- swarm_primary_runtime_ip.stdout | trim != '10.0.0.200'
fail_msg: >-
IP detected on {{ swarm_primary_manager }} was '{{ swarm_primary_runtime_ip.stdout | trim }}'.
This looks like the control node IP, which means delegate_to is not working.
Ensure 'connection: local' is NOT set at play level in bootstrap_swarm.yml.
success_msg: "Detected primary manager runtime IPv4: {{ swarm_primary_runtime_ip.stdout | trim }}"
tags: [always]
- name: Set primary manager advertise address from runtime network state
ansible.builtin.set_fact:
swarm_primary_advertise_addr: "{{ swarm_primary_runtime_ip.stdout | trim }}"
tags: [always]
- name: Set Docker CLI environment guardrails
ansible.builtin.set_fact:
swarm_docker_cli_env:
DOCKER_HOST: "unix:///var/run/docker.sock"
DOCKER_CONTEXT: ""
tags: [always]
- name: Read Docker daemon hostname from primary manager context
ansible.builtin.command: docker info --format '{{"{{"}} .Name {{"}}"}}'
changed_when: false
check_mode: false
register: swarm_primary_docker_name
become: true
delegate_to: "{{ swarm_primary_manager }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [always]
- name: Assert Docker daemon context points to expected manager
ansible.builtin.assert:
that:
- swarm_primary_docker_name.stdout | trim != 'watchtower'
- swarm_primary_docker_name.stdout | trim | length > 0
fail_msg: >-
Docker daemon reports '{{ swarm_primary_docker_name.stdout | trim }}' — this is the control node, not {{ swarm_primary_manager }}.
The 'connection: local' directive must NOT be set at play level in bootstrap_swarm.yml.
success_msg: "Docker daemon context verified on primary manager: {{ swarm_primary_docker_name.stdout | trim }}"
tags: [always]
- name: Preflight - check SSH port reachability from control node
ansible.builtin.wait_for:
host: "{{ hostvars[item].ansible_host | default(item) }}"
port: 22
timeout: 3
connect_timeout: 2
state: started
register: swarm_ssh_preflight
failed_when: false
delegate_to: localhost
loop: "{{ swarm_all_nodes }}"
tags: [always]
- name: Build list of unreachable swarm nodes
ansible.builtin.set_fact:
swarm_unreachable_nodes: >-
{{
swarm_ssh_preflight.results
| selectattr('failed', 'defined')
| selectattr('failed')
| map(attribute='item')
| list
}}
tags: [always]
- name: Fail fast when swarm nodes are unreachable over SSH
ansible.builtin.assert:
that:
- swarm_unreachable_nodes | length == 0
fail_msg: >-
Cannot reach TCP/22 from control node for: {{ swarm_unreachable_nodes | join(', ') }}.
Confirm VM power state, IP assignment, routing, and firewall before bootstrap.
success_msg: "SSH reachability preflight passed for all swarm nodes."
tags: [always]
- name: Ensure Docker keyring directory exists
ansible.builtin.file:
path: /etc/apt/keyrings
state: directory
mode: '0755'
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Discover apt source files on swarm nodes
ansible.builtin.find:
paths: /etc/apt/sources.list.d
patterns:
- "*.list"
- "*.sources"
file_type: file
register: swarm_bootstrap_apt_source_candidates
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Read apt source file contents on swarm nodes
ansible.builtin.slurp:
src: "{{ source_file.path }}"
register: swarm_bootstrap_apt_source_contents
become: true
delegate_to: "{{ item.0.item }}"
loop: >-
{{
swarm_bootstrap_apt_source_candidates.results
| subelements('files', skip_missing=True)
}}
loop_control:
loop_var: item
label: "{{ item.0.item }} -> {{ item.1.path }}"
vars:
source_file: "{{ item.1 }}"
- name: Remove discovered Docker apt source files on swarm nodes
ansible.builtin.file:
path: "{{ item.source }}"
state: absent
become: true
delegate_to: "{{ item.item.0.item }}"
loop: "{{ swarm_bootstrap_apt_source_contents.results }}"
when:
- item.content is defined
- "'download.docker.com' in (item.content | b64decode)"
loop_control:
label: "{{ item.item.0.item }} -> {{ item.source }}"
- name: Remove Docker source entries from main apt sources list
ansible.builtin.lineinfile:
path: /etc/apt/sources.list
regexp: '^.*download\\.docker\\.com.*$'
state: absent
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Remove legacy Docker apt source list
ansible.builtin.file:
path: "/etc/apt/sources.list.d/{{ swarm_docker_repo_filename }}.list"
state: absent
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Remove legacy add-apt-repository Docker source list
ansible.builtin.file:
path: >-
/etc/apt/sources.list.d/archive_uri-https_download_docker_com_linux_ubuntu-
{{ hostvars[item].ansible_distribution_release | default('noble') }}.list
state: absent
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Remove legacy Docker deb822 source definition
ansible.builtin.file:
path: "/etc/apt/sources.list.d/{{ swarm_docker_repo_filename }}.sources"
state: absent
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Remove legacy Docker apt signing key path
ansible.builtin.file:
path: "{{ swarm_docker_legacy_keyring_path }}"
state: absent
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Install Docker apt signing key
ansible.builtin.get_url:
url: "{{ swarm_docker_repo_gpg_url }}"
dest: "{{ swarm_docker_keyring_path }}"
mode: '0644'
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Add Docker apt repository
vars:
# Derive the correct Debian architecture string per-node so ARM and x86 nodes
# in the same inventory both receive the right repo entry.
_node_apt_arch: "{{ 'arm64' if hostvars[item].ansible_architecture | default('x86_64') == 'aarch64' else 'amd64' }}"
ansible.builtin.apt_repository:
repo: "deb [arch={{ _node_apt_arch }} signed-by={{ swarm_docker_keyring_path }}] {{ swarm_docker_repo_url }} {{ hostvars[item].ansible_distribution_release | default('noble') }} stable"
filename: "{{ swarm_docker_repo_filename }}"
state: present
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Ensure Docker dependencies are installed
ansible.builtin.apt:
name:
- ca-certificates
- curl
- gnupg
- python3-docker
state: present
update_cache: true
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Install Docker engine packages
ansible.builtin.apt:
name: "{{ swarm_docker_packages }}"
state: present
update_cache: true
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Ensure Docker service is enabled and running
ansible.builtin.systemd:
name: docker
enabled: true
state: started
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Ensure ansible_user is in the docker group
ansible.builtin.user:
# Use per-host ansible_user from hostvars so mixed-user inventories work correctly.
# Falls back to the play-level ansible_user when not set on a specific host.
name: "{{ hostvars[item].ansible_user | default(ansible_user) }}"
groups: docker
append: true
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_all_nodes }}"
- name: Read primary manager swarm state
ansible.builtin.command: docker info --format '{{"{{"}} .Swarm.LocalNodeState {{"}}"}}'
changed_when: false
check_mode: false
register: swarm_primary_state
become: true
delegate_to: "{{ swarm_primary_manager }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Initialize swarm on primary manager when inactive
ansible.builtin.command: >-
docker swarm init
--advertise-addr {{ swarm_primary_advertise_addr }}
--listen-addr {{ swarm_primary_advertise_addr }}:{{ swarm_api_port }}
--default-addr-pool {{ swarm_default_addr_pool }}
--default-addr-pool-mask-length {{ swarm_default_addr_pool_mask_length }}
register: swarm_init_result
changed_when: swarm_init_result.rc == 0
failed_when:
- swarm_init_result.rc != 0
- "'This node is already part of a swarm' not in swarm_init_result.stderr"
when: swarm_primary_state.stdout != 'active'
become: true
delegate_to: "{{ swarm_primary_manager }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Get manager join token
ansible.builtin.command: docker swarm join-token -q manager
changed_when: false
check_mode: false
register: swarm_manager_token
become: true
delegate_to: "{{ swarm_primary_manager }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Get worker join token
ansible.builtin.command: docker swarm join-token -q worker
changed_when: false
check_mode: false
register: swarm_worker_token
become: true
delegate_to: "{{ swarm_primary_manager }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Check secondary manager swarm state
ansible.builtin.command: docker info --format '{{"{{"}} .Swarm.LocalNodeState {{"}}"}}'
changed_when: false
check_mode: false
register: swarm_secondary_manager_states
become: true
delegate_to: "{{ item }}"
loop: "{{ swarm_secondary_managers }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Join secondary managers to swarm when not active
ansible.builtin.command: >-
docker swarm join
--token {{ swarm_manager_token.stdout }}
{{ swarm_primary_advertise_addr }}:{{ swarm_api_port }}
register: swarm_join_manager_results
changed_when: swarm_join_manager_results.rc == 0
failed_when:
- swarm_join_manager_results.rc != 0
- "'This node is already part of a swarm' not in swarm_join_manager_results.stderr"
become: true
delegate_to: "{{ item.item }}"
loop: "{{ swarm_secondary_manager_states.results }}"
when: item.stdout != 'active'
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Check worker swarm state
ansible.builtin.command: docker info --format '{{"{{"}} .Swarm.LocalNodeState {{"}}"}}'
changed_when: false
check_mode: false
register: swarm_worker_states
become: true
delegate_to: "{{ item }}"
loop: "{{ groups['swarm_workers'] }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Join workers to swarm when not active
ansible.builtin.command: >-
docker swarm join
--token {{ swarm_worker_token.stdout }}
{{ swarm_primary_advertise_addr }}:{{ swarm_api_port }}
register: swarm_join_worker_results
changed_when: swarm_join_worker_results.rc == 0
failed_when:
- swarm_join_worker_results.rc != 0
- "'This node is already part of a swarm' not in swarm_join_worker_results.stderr"
become: true
delegate_to: "{{ item.item }}"
loop: "{{ swarm_worker_states.results }}"
when: item.stdout != 'active'
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Collect swarm node hostnames from primary manager
ansible.builtin.command: docker node ls --format '{{"{{"}} .Hostname {{"}}"}}'
changed_when: false
check_mode: false
register: swarm_node_hostnames
become: true
delegate_to: "{{ swarm_primary_manager }}"
environment: "{{ swarm_docker_cli_env }}"
tags: [swarm-join]
- name: Assert expected swarm node count is present on primary manager
ansible.builtin.assert:
that:
- swarm_node_hostnames.stdout_lines | length == swarm_all_nodes | length
fail_msg: >-
Primary manager sees {{ swarm_node_hostnames.stdout_lines | length }} node(s),
expected {{ swarm_all_nodes | length }}. Check Docker context and swarm membership.
success_msg: "Primary manager sees expected swarm node count."
tags: [swarm-join]
- name: Read existing manager label values by hostname
ansible.builtin.command: >-
docker node inspect
--format '{{"{{"}} with index .Spec.Labels "node.role" {{"}}"}}{{"{{"}} . {{"}}"}}{{"{{"}} end {{"}}"}}'
{{ item }}
changed_when: false
failed_when: false
register: manager_node_labels
become: true
delegate_to: "{{ swarm_primary_manager }}"
loop: "{{ groups['swarm_managers'] }}"
environment: "{{ swarm_docker_cli_env }}"
- name: Ensure manager role labels are present by hostname
ansible.builtin.command: docker node update --label-add node.role=manager {{ item.item }}
register: manager_label_update
changed_when: manager_label_update.rc == 0
become: true
delegate_to: "{{ swarm_primary_manager }}"
loop: "{{ manager_node_labels.results }}"
when: item.stdout != 'manager'
environment: "{{ swarm_docker_cli_env }}"
- name: Read existing worker label values by hostname
ansible.builtin.command: >-
docker node inspect
--format '{{"{{"}} with index .Spec.Labels "node.role" {{"}}"}}{{"{{"}} . {{"}}"}}{{"{{"}} end {{"}}"}}'
{{ item }}
changed_when: false
failed_when: false
register: worker_node_labels
become: true
delegate_to: "{{ swarm_primary_manager }}"
loop: "{{ groups['swarm_workers'] }}"
environment: "{{ swarm_docker_cli_env }}"
- name: Ensure worker role labels are present by hostname
ansible.builtin.command: docker node update --label-add node.role=worker {{ item.item }}
register: worker_label_update
changed_when: worker_label_update.rc == 0
become: true
delegate_to: "{{ swarm_primary_manager }}"
loop: "{{ worker_node_labels.results }}"
when: item.stdout != 'worker'
environment: "{{ swarm_docker_cli_env }}"
- name: Show final swarm node table
ansible.builtin.command: docker node ls
changed_when: false
register: swarm_node_ls
become: true
delegate_to: "{{ swarm_primary_manager }}"
environment: "{{ swarm_docker_cli_env }}"
- name: Print swarm verification output
ansible.builtin.debug:
var: swarm_node_ls.stdout_lines