453 lines
15 KiB
YAML
453 lines
15 KiB
YAML
---
|
|
# End-to-end Docker Swarm bootstrap from the primary manager.
|
|
|
|
- name: Validate required inventory groups exist
|
|
ansible.builtin.assert:
|
|
that:
|
|
- groups['swarm_managers'] is defined
|
|
- groups['swarm_managers'] | length > 0
|
|
- groups['swarm_workers'] is defined
|
|
- groups['swarm_workers'] | length > 0
|
|
fail_msg: "Inventory must define non-empty swarm_managers and swarm_workers groups."
|
|
success_msg: "Required Swarm inventory groups are present."
|
|
tags: [always]
|
|
|
|
- name: Build swarm host lists
|
|
ansible.builtin.set_fact:
|
|
swarm_primary_manager: "{{ groups['swarm_managers'][0] }}"
|
|
swarm_secondary_managers: "{{ groups['swarm_managers'][1:] }}"
|
|
swarm_all_nodes: "{{ groups['swarm_managers'] + groups['swarm_workers'] }}"
|
|
tags: [always]
|
|
|
|
- name: Detect runtime IPv4 on primary manager
|
|
ansible.builtin.shell: ip -4 route get 1.1.1.1 | awk '{print $7; exit}'
|
|
args:
|
|
executable: /bin/bash
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_primary_runtime_ip
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
tags: [always]
|
|
|
|
- name: Validate runtime IPv4 detection on primary manager
|
|
ansible.builtin.assert:
|
|
that:
|
|
- swarm_primary_runtime_ip.stdout | trim | length > 0
|
|
- swarm_primary_runtime_ip.stdout | trim != ansible_host | default('127.0.0.1')
|
|
- swarm_primary_runtime_ip.stdout | trim != '10.0.0.200'
|
|
fail_msg: >-
|
|
IP detected on {{ swarm_primary_manager }} was '{{ swarm_primary_runtime_ip.stdout | trim }}'.
|
|
This looks like the control node IP, which means delegate_to is not working.
|
|
Ensure 'connection: local' is NOT set at play level in bootstrap_swarm.yml.
|
|
success_msg: "Detected primary manager runtime IPv4: {{ swarm_primary_runtime_ip.stdout | trim }}"
|
|
tags: [always]
|
|
|
|
- name: Set primary manager advertise address from runtime network state
|
|
ansible.builtin.set_fact:
|
|
swarm_primary_advertise_addr: "{{ swarm_primary_runtime_ip.stdout | trim }}"
|
|
tags: [always]
|
|
|
|
- name: Set Docker CLI environment guardrails
|
|
ansible.builtin.set_fact:
|
|
swarm_docker_cli_env:
|
|
DOCKER_HOST: "unix:///var/run/docker.sock"
|
|
DOCKER_CONTEXT: ""
|
|
tags: [always]
|
|
|
|
- name: Read Docker daemon hostname from primary manager context
|
|
ansible.builtin.command: docker info --format '{{"{{"}} .Name {{"}}"}}'
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_primary_docker_name
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [always]
|
|
|
|
- name: Assert Docker daemon context points to expected manager
|
|
ansible.builtin.assert:
|
|
that:
|
|
- swarm_primary_docker_name.stdout | trim != 'watchtower'
|
|
- swarm_primary_docker_name.stdout | trim | length > 0
|
|
fail_msg: >-
|
|
Docker daemon reports '{{ swarm_primary_docker_name.stdout | trim }}' — this is the control node, not {{ swarm_primary_manager }}.
|
|
The 'connection: local' directive must NOT be set at play level in bootstrap_swarm.yml.
|
|
success_msg: "Docker daemon context verified on primary manager: {{ swarm_primary_docker_name.stdout | trim }}"
|
|
tags: [always]
|
|
|
|
- name: Preflight - check SSH port reachability from control node
|
|
ansible.builtin.wait_for:
|
|
host: "{{ hostvars[item].ansible_host | default(item) }}"
|
|
port: 22
|
|
timeout: 3
|
|
connect_timeout: 2
|
|
state: started
|
|
register: swarm_ssh_preflight
|
|
failed_when: false
|
|
delegate_to: localhost
|
|
loop: "{{ swarm_all_nodes }}"
|
|
tags: [always]
|
|
|
|
- name: Build list of unreachable swarm nodes
|
|
ansible.builtin.set_fact:
|
|
swarm_unreachable_nodes: >-
|
|
{{
|
|
swarm_ssh_preflight.results
|
|
| selectattr('failed', 'defined')
|
|
| selectattr('failed')
|
|
| map(attribute='item')
|
|
| list
|
|
}}
|
|
tags: [always]
|
|
|
|
- name: Fail fast when swarm nodes are unreachable over SSH
|
|
ansible.builtin.assert:
|
|
that:
|
|
- swarm_unreachable_nodes | length == 0
|
|
fail_msg: >-
|
|
Cannot reach TCP/22 from control node for: {{ swarm_unreachable_nodes | join(', ') }}.
|
|
Confirm VM power state, IP assignment, routing, and firewall before bootstrap.
|
|
success_msg: "SSH reachability preflight passed for all swarm nodes."
|
|
tags: [always]
|
|
|
|
- name: Ensure Docker keyring directory exists
|
|
ansible.builtin.file:
|
|
path: /etc/apt/keyrings
|
|
state: directory
|
|
mode: '0755'
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Discover apt source files on swarm nodes
|
|
ansible.builtin.find:
|
|
paths: /etc/apt/sources.list.d
|
|
patterns:
|
|
- "*.list"
|
|
- "*.sources"
|
|
file_type: file
|
|
register: swarm_bootstrap_apt_source_candidates
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Read apt source file contents on swarm nodes
|
|
ansible.builtin.slurp:
|
|
src: "{{ source_file.path }}"
|
|
register: swarm_bootstrap_apt_source_contents
|
|
become: true
|
|
delegate_to: "{{ item.0.item }}"
|
|
loop: >-
|
|
{{
|
|
swarm_bootstrap_apt_source_candidates.results
|
|
| subelements('files', skip_missing=True)
|
|
}}
|
|
loop_control:
|
|
loop_var: item
|
|
label: "{{ item.0.item }} -> {{ item.1.path }}"
|
|
vars:
|
|
source_file: "{{ item.1 }}"
|
|
|
|
- name: Remove discovered Docker apt source files on swarm nodes
|
|
ansible.builtin.file:
|
|
path: "{{ item.source }}"
|
|
state: absent
|
|
become: true
|
|
delegate_to: "{{ item.item.0.item }}"
|
|
loop: "{{ swarm_bootstrap_apt_source_contents.results }}"
|
|
when:
|
|
- item.content is defined
|
|
- "'download.docker.com' in (item.content | b64decode)"
|
|
loop_control:
|
|
label: "{{ item.item.0.item }} -> {{ item.source }}"
|
|
|
|
- name: Remove Docker source entries from main apt sources list
|
|
ansible.builtin.lineinfile:
|
|
path: /etc/apt/sources.list
|
|
regexp: '^.*download\\.docker\\.com.*$'
|
|
state: absent
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Remove legacy Docker apt source list
|
|
ansible.builtin.file:
|
|
path: "/etc/apt/sources.list.d/{{ swarm_docker_repo_filename }}.list"
|
|
state: absent
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Remove legacy add-apt-repository Docker source list
|
|
ansible.builtin.file:
|
|
path: >-
|
|
/etc/apt/sources.list.d/archive_uri-https_download_docker_com_linux_ubuntu-
|
|
{{ hostvars[item].ansible_distribution_release | default('noble') }}.list
|
|
state: absent
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Remove legacy Docker deb822 source definition
|
|
ansible.builtin.file:
|
|
path: "/etc/apt/sources.list.d/{{ swarm_docker_repo_filename }}.sources"
|
|
state: absent
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Remove legacy Docker apt signing key path
|
|
ansible.builtin.file:
|
|
path: "{{ swarm_docker_legacy_keyring_path }}"
|
|
state: absent
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Install Docker apt signing key
|
|
ansible.builtin.get_url:
|
|
url: "{{ swarm_docker_repo_gpg_url }}"
|
|
dest: "{{ swarm_docker_keyring_path }}"
|
|
mode: '0644'
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Add Docker apt repository
|
|
vars:
|
|
# Derive the correct Debian architecture string per-node so ARM and x86 nodes
|
|
# in the same inventory both receive the right repo entry.
|
|
_node_apt_arch: "{{ 'arm64' if hostvars[item].ansible_architecture | default('x86_64') == 'aarch64' else 'amd64' }}"
|
|
ansible.builtin.apt_repository:
|
|
repo: "deb [arch={{ _node_apt_arch }} signed-by={{ swarm_docker_keyring_path }}] {{ swarm_docker_repo_url }} {{ hostvars[item].ansible_distribution_release | default('noble') }} stable"
|
|
filename: "{{ swarm_docker_repo_filename }}"
|
|
state: present
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Ensure Docker dependencies are installed
|
|
ansible.builtin.apt:
|
|
name:
|
|
- ca-certificates
|
|
- curl
|
|
- gnupg
|
|
- python3-docker
|
|
state: present
|
|
update_cache: true
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Install Docker engine packages
|
|
ansible.builtin.apt:
|
|
name: "{{ swarm_docker_packages }}"
|
|
state: present
|
|
update_cache: true
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Ensure Docker service is enabled and running
|
|
ansible.builtin.systemd:
|
|
name: docker
|
|
enabled: true
|
|
state: started
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Ensure ansible_user is in the docker group
|
|
ansible.builtin.user:
|
|
# Use per-host ansible_user from hostvars so mixed-user inventories work correctly.
|
|
# Falls back to the play-level ansible_user when not set on a specific host.
|
|
name: "{{ hostvars[item].ansible_user | default(ansible_user) }}"
|
|
groups: docker
|
|
append: true
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_all_nodes }}"
|
|
|
|
- name: Read primary manager swarm state
|
|
ansible.builtin.command: docker info --format '{{"{{"}} .Swarm.LocalNodeState {{"}}"}}'
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_primary_state
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Initialize swarm on primary manager when inactive
|
|
ansible.builtin.command: >-
|
|
docker swarm init
|
|
--advertise-addr {{ swarm_primary_advertise_addr }}
|
|
--listen-addr {{ swarm_primary_advertise_addr }}:{{ swarm_api_port }}
|
|
--default-addr-pool {{ swarm_default_addr_pool }}
|
|
--default-addr-pool-mask-length {{ swarm_default_addr_pool_mask_length }}
|
|
register: swarm_init_result
|
|
changed_when: swarm_init_result.rc == 0
|
|
failed_when:
|
|
- swarm_init_result.rc != 0
|
|
- "'This node is already part of a swarm' not in swarm_init_result.stderr"
|
|
when: swarm_primary_state.stdout != 'active'
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Get manager join token
|
|
ansible.builtin.command: docker swarm join-token -q manager
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_manager_token
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Get worker join token
|
|
ansible.builtin.command: docker swarm join-token -q worker
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_worker_token
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Check secondary manager swarm state
|
|
ansible.builtin.command: docker info --format '{{"{{"}} .Swarm.LocalNodeState {{"}}"}}'
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_secondary_manager_states
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ swarm_secondary_managers }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Join secondary managers to swarm when not active
|
|
ansible.builtin.command: >-
|
|
docker swarm join
|
|
--token {{ swarm_manager_token.stdout }}
|
|
{{ swarm_primary_advertise_addr }}:{{ swarm_api_port }}
|
|
register: swarm_join_manager_results
|
|
changed_when: swarm_join_manager_results.rc == 0
|
|
failed_when:
|
|
- swarm_join_manager_results.rc != 0
|
|
- "'This node is already part of a swarm' not in swarm_join_manager_results.stderr"
|
|
become: true
|
|
delegate_to: "{{ item.item }}"
|
|
loop: "{{ swarm_secondary_manager_states.results }}"
|
|
when: item.stdout != 'active'
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Check worker swarm state
|
|
ansible.builtin.command: docker info --format '{{"{{"}} .Swarm.LocalNodeState {{"}}"}}'
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_worker_states
|
|
become: true
|
|
delegate_to: "{{ item }}"
|
|
loop: "{{ groups['swarm_workers'] }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Join workers to swarm when not active
|
|
ansible.builtin.command: >-
|
|
docker swarm join
|
|
--token {{ swarm_worker_token.stdout }}
|
|
{{ swarm_primary_advertise_addr }}:{{ swarm_api_port }}
|
|
register: swarm_join_worker_results
|
|
changed_when: swarm_join_worker_results.rc == 0
|
|
failed_when:
|
|
- swarm_join_worker_results.rc != 0
|
|
- "'This node is already part of a swarm' not in swarm_join_worker_results.stderr"
|
|
become: true
|
|
delegate_to: "{{ item.item }}"
|
|
loop: "{{ swarm_worker_states.results }}"
|
|
when: item.stdout != 'active'
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Collect swarm node hostnames from primary manager
|
|
ansible.builtin.command: docker node ls --format '{{"{{"}} .Hostname {{"}}"}}'
|
|
changed_when: false
|
|
check_mode: false
|
|
register: swarm_node_hostnames
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
tags: [swarm-join]
|
|
|
|
- name: Assert expected swarm node count is present on primary manager
|
|
ansible.builtin.assert:
|
|
that:
|
|
- swarm_node_hostnames.stdout_lines | length == swarm_all_nodes | length
|
|
fail_msg: >-
|
|
Primary manager sees {{ swarm_node_hostnames.stdout_lines | length }} node(s),
|
|
expected {{ swarm_all_nodes | length }}. Check Docker context and swarm membership.
|
|
success_msg: "Primary manager sees expected swarm node count."
|
|
tags: [swarm-join]
|
|
|
|
- name: Read existing manager label values by hostname
|
|
ansible.builtin.command: >-
|
|
docker node inspect
|
|
--format '{{"{{"}} with index .Spec.Labels "node.role" {{"}}"}}{{"{{"}} . {{"}}"}}{{"{{"}} end {{"}}"}}'
|
|
{{ item }}
|
|
changed_when: false
|
|
failed_when: false
|
|
register: manager_node_labels
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
loop: "{{ groups['swarm_managers'] }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
|
|
- name: Ensure manager role labels are present by hostname
|
|
ansible.builtin.command: docker node update --label-add node.role=manager {{ item.item }}
|
|
register: manager_label_update
|
|
changed_when: manager_label_update.rc == 0
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
loop: "{{ manager_node_labels.results }}"
|
|
when: item.stdout != 'manager'
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
|
|
- name: Read existing worker label values by hostname
|
|
ansible.builtin.command: >-
|
|
docker node inspect
|
|
--format '{{"{{"}} with index .Spec.Labels "node.role" {{"}}"}}{{"{{"}} . {{"}}"}}{{"{{"}} end {{"}}"}}'
|
|
{{ item }}
|
|
changed_when: false
|
|
failed_when: false
|
|
register: worker_node_labels
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
loop: "{{ groups['swarm_workers'] }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
|
|
- name: Ensure worker role labels are present by hostname
|
|
ansible.builtin.command: docker node update --label-add node.role=worker {{ item.item }}
|
|
register: worker_label_update
|
|
changed_when: worker_label_update.rc == 0
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
loop: "{{ worker_node_labels.results }}"
|
|
when: item.stdout != 'worker'
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
|
|
- name: Show final swarm node table
|
|
ansible.builtin.command: docker node ls
|
|
changed_when: false
|
|
register: swarm_node_ls
|
|
become: true
|
|
delegate_to: "{{ swarm_primary_manager }}"
|
|
environment: "{{ swarm_docker_cli_env }}"
|
|
|
|
- name: Print swarm verification output
|
|
ansible.builtin.debug:
|
|
var: swarm_node_ls.stdout_lines
|