66 lines
2.6 KiB
YAML
66 lines
2.6 KiB
YAML
# traefik-kop Swarm stack
|
|
# Managed by Ansible — manual edits will be overwritten on next deploy.
|
|
# Source vars: group_vars/all.yml (edge_routing.swarm.*)
|
|
# Deploy via: ansible-playbook playbooks/docker/deploy_traefik_kop.yml
|
|
#
|
|
# WHAT THIS DOES:
|
|
# Runs as a Swarm service on a manager node. Reads Docker service labels
|
|
# (traefik.enable=true etc.) from Swarm services and publishes routing
|
|
# rules into the Redis instance on Heimdall ({{ edge_routing.integration.redis_addr }}).
|
|
# Traefik then picks up these routes from Redis automatically.
|
|
#
|
|
# NETWORK NOTE:
|
|
# proxy-net here is a Swarm overlay network — distinct from the bridge
|
|
# network of the same name on Heimdall. The overlay allows future Swarm
|
|
# services to declare `networks: [proxy-net]` and be discoverable by kop.
|
|
version: "3.9"
|
|
|
|
services:
|
|
traefik-kop:
|
|
image: "{{ edge_routing.integration.agent_image }}"
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
# WHY :ro — kop only reads Swarm service state, never modifies Docker.
|
|
# Read-only mount is defence-in-depth against container escape.
|
|
environment:
|
|
- REDIS_ADDR={{ edge_routing.integration.redis_addr }}
|
|
- BIND_IP={{ edge_routing.swarm.bind_ip }}
|
|
# WHY BIND_IP is a Swarm node IP (not Heimdall):
|
|
# kop writes "route traffic for <service> to BIND_IP:<published-port>".
|
|
# The Swarm routing mesh makes published ports available on ALL nodes,
|
|
# so Traefik sends the request here and the mesh handles the rest.
|
|
networks:
|
|
- proxy-net
|
|
deploy:
|
|
replicas: 1
|
|
placement:
|
|
constraints:
|
|
- node.role == manager
|
|
# WHY manager only: only manager nodes hold full Swarm Raft state.
|
|
# A worker node has an incomplete view of all services and their labels.
|
|
restart_policy:
|
|
condition: on-failure
|
|
delay: 5s
|
|
max_attempts: 3
|
|
window: 30s
|
|
# WHY on-failure (not always): avoids rapid reconnect storms
|
|
# against Redis during a network partition.
|
|
update_config:
|
|
parallelism: 1
|
|
order: start-first
|
|
failure_action: rollback
|
|
delay: 10s
|
|
monitor: 30s
|
|
# WHY start-first: new task starts before old one stops, giving
|
|
# zero downtime. Rollback triggers if monitoring detects failure.
|
|
rollback_config:
|
|
parallelism: 1
|
|
order: stop-first
|
|
|
|
networks:
|
|
proxy-net:
|
|
external: true
|
|
name: "{{ edge_routing.swarm.proxy_network }}"
|
|
# WHY external: this overlay network is pre-created in the deploy playbook
|
|
# so future Swarm service stacks can also join it without stack coupling.
|