added swarm focused ansible files to use as inspo for new environment
This commit is contained in:
parent
fe3a33968e
commit
0fcf627e6c
49
ansible/ansible-old/.ansible-lint
Normal file
49
ansible/ansible-old/.ansible-lint
Normal file
@ -0,0 +1,49 @@
|
||||
---
|
||||
# Ansible Lint Configuration
|
||||
# Enforces quality standards for playbooks and roles
|
||||
# Documentation: https://ansible-lint.readthedocs.io/
|
||||
|
||||
# Exclude paths from linting
|
||||
exclude_paths:
|
||||
- .cache/
|
||||
- .git/
|
||||
- outputs/
|
||||
- scripts/
|
||||
|
||||
# Enable offline mode (do not check for latest Ansible version)
|
||||
offline: true
|
||||
|
||||
# Skip specific rules (with justification)
|
||||
skip_list:
|
||||
- 'yaml[line-length]' # Advisory: Many legitimate cases exceed 160 chars
|
||||
- 'name[casing]' # Advisory: Emoji and stylistic choices in task names
|
||||
# NOTE: no-changed-when removed from skip_list — now enforced as a warning
|
||||
# (warn_list below). Stack playbooks and the swarm_stack_deploy role MUST
|
||||
# be fully compliant. Bootstrap playbooks with legitimate raw/command use
|
||||
# may suppress per-task with: # noqa: no-changed-when
|
||||
- 'command-instead-of-module' # Advisory: Some Proxmox/specialized commands lack modules
|
||||
- 'var-naming[no-role-prefix]' # Advisory: swarm_stack_deploy intentionally exposes a
|
||||
# short 'stack_*' public API namespace. Renaming to 'swarm_stack_deploy_*' would be a
|
||||
# breaking change for all callers. Suppress globally; revisit in Phase 3 refactor.
|
||||
|
||||
# Warn on specific rules (advisory, not blocking)
|
||||
warn_list:
|
||||
- 'experimental' # Flag new/experimental syntax for review
|
||||
- 'jinja[spacing]' # Encourage spacing in templates
|
||||
- 'risky-file-permissions' # Flag overly permissive file modes
|
||||
- 'no-changed-when' # Promoted from skip: visible on all command/shell tasks missing changed_when
|
||||
# NEXT PHASE: move to blocking by removing from warn_list entirely
|
||||
|
||||
# Additional quality checks
|
||||
kinds:
|
||||
- playbook: "playbooks/**/*.yml"
|
||||
- tasks: "roles/*/tasks/**/*.yml"
|
||||
- vars: "group_vars/**/*.yml"
|
||||
- defaults: "roles/*/defaults/**/*.yml"
|
||||
- handlers: "roles/*/handlers/**/*.yml"
|
||||
|
||||
# Profile to use (min, basic, moderate, safety, shared, production)
|
||||
profile: moderate
|
||||
|
||||
# Treat warnings as errors (disable initially until baseline is clean)
|
||||
# strict: false
|
||||
23
ansible/ansible-old/.gitignore
vendored
Normal file
23
ansible/ansible-old/.gitignore
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
# Python Virtual Environment
|
||||
.venv/
|
||||
venv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Ansible Runtime
|
||||
*.retry
|
||||
.ansible/
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Secrets (never commit!)
|
||||
group_vars/*/vault.yml
|
||||
host_vars/*/vault.yml
|
||||
*.vault
|
||||
|
||||
# Temporary Files
|
||||
*.log
|
||||
*.tmp
|
||||
.DS_Store
|
||||
1
ansible/ansible-old/.vault_pass
Normal file
1
ansible/ansible-old/.vault_pass
Normal file
@ -0,0 +1 @@
|
||||
Promci*1
|
||||
37
ansible/ansible-old/.yamllint
Normal file
37
ansible/ansible-old/.yamllint
Normal file
@ -0,0 +1,37 @@
|
||||
---
|
||||
# yamllint configuration for Ansible project
|
||||
# Aligned with .ansible-lint skip-list rationale.
|
||||
# 'yaml[line-length]' is advisory: Jinja2 templates and Traefik labels
|
||||
# routinely exceed 80 chars and wrapping them reduces readability.
|
||||
#
|
||||
# Rules below also satisfy ansible-lint's required yamllint constraints:
|
||||
# comments.min-spaces-from-content: 1
|
||||
# comments-indentation: false
|
||||
# braces.max-spaces-inside: 1
|
||||
# octal-values.forbid-implicit-octal: true
|
||||
# octal-values.forbid-explicit-octal: true
|
||||
|
||||
extends: default
|
||||
|
||||
rules:
|
||||
# Allow up to 160 chars — matches the rationale in .ansible-lint:
|
||||
# "Many legitimate cases exceed 160 chars" (Traefik labels, Jinja2 expressions)
|
||||
line-length:
|
||||
max: 160
|
||||
level: warning
|
||||
|
||||
# Docker Compose / Swarm stack files do not use YAML document start markers.
|
||||
# Ansible playbooks do. Make this a warning rather than an error so stack
|
||||
# templates are not penalised while playbooks are still encouraged to use ---.
|
||||
document-start:
|
||||
level: warning
|
||||
|
||||
# Required by ansible-lint compatibility rules:
|
||||
comments:
|
||||
min-spaces-from-content: 1
|
||||
comments-indentation: false
|
||||
braces:
|
||||
max-spaces-inside: 1
|
||||
octal-values:
|
||||
forbid-implicit-octal: true
|
||||
forbid-explicit-octal: true
|
||||
12
ansible/ansible-old/ansible.cfg
Normal file
12
ansible/ansible-old/ansible.cfg
Normal file
@ -0,0 +1,12 @@
|
||||
[defaults]
|
||||
inventory = inventory/hosts.ini
|
||||
host_key_checking = True
|
||||
deprecation_warnings = False
|
||||
interpreter_python = auto_silent
|
||||
vault_password_file = .vault_pass
|
||||
|
||||
# Paths (relative to this ansible/ directory)
|
||||
roles_path = ./roles
|
||||
|
||||
# Show task timing and profiling
|
||||
callbacks_enabled = timer, profile_tasks
|
||||
70
ansible/ansible-old/documentation/README.md
Normal file
70
ansible/ansible-old/documentation/README.md
Normal file
@ -0,0 +1,70 @@
|
||||
# Ansible Documentation
|
||||
|
||||
This folder contains **Ansible-specific** technical documentation for the homelab automation framework.
|
||||
|
||||
## Documentation Organization
|
||||
|
||||
The homelab uses a **domain-based separation** for documentation:
|
||||
|
||||
### Ansible-Specific Documentation (This Folder)
|
||||
|
||||
Documentation about **how Ansible works** in this homelab:
|
||||
|
||||
- **[ansible-knowledge/](ansible-knowledge/)** — Ansible syntax, YAML/Jinja2 reference, technical constraints
|
||||
- **[playbooks/](playbooks/)** — Operational guides for running specific playbooks
|
||||
- **[playbooks/README.md](playbooks/README.md)** — Playbook runbook index, including Watchtower monitoring onboarding and self-healing
|
||||
- **[standards/ansible-quality-gates.md](standards/ansible-quality-gates.md)** — Ansible linting rules, security checklist, review workflow
|
||||
|
||||
### Homelab-Wide Documentation (Root `/documentation/`)
|
||||
|
||||
Documentation about **what the homelab allows** and architectural decisions:
|
||||
|
||||
- **[/documentation/architecture/](../../documentation/architecture/)** — Architectural contracts (control-plane, compute-plane, networking, storage, access-identity)
|
||||
- **[/documentation/standards/](../../documentation/standards/)** — Homelab-wide standards (naming conventions, environment constraints, architecture decisions)
|
||||
- **[/documentation/policies/](../../documentation/policies/)** — Operational policies (networking policy, etc.)
|
||||
- **[/documentation/handover.md](../../documentation/handover.md)** — Primary project handover document
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### When Troubleshooting Ansible Issues
|
||||
|
||||
1. **Syntax errors?** → [ansible-knowledge/ansible-syntax.md](ansible-knowledge/ansible-syntax.md)
|
||||
2. **Playbook not working?** → [playbooks/README.md](playbooks/README.md) for operational guides
|
||||
3. **Monitoring stack onboarding?** → [playbooks/watchtower-monitoring-onboarding.md](playbooks/watchtower-monitoring-onboarding.md)
|
||||
4. **Linting failures?** → [standards/ansible-quality-gates.md](standards/ansible-quality-gates.md)
|
||||
|
||||
### When Designing Infrastructure
|
||||
|
||||
1. **What services can run where?** → [/documentation/architecture/compute-plane.md](../../documentation/architecture/compute-plane.md)
|
||||
2. **Network topology?** → [/documentation/architecture/networking.md](../../documentation/architecture/networking.md)
|
||||
3. **Storage architecture?** → [/documentation/architecture/storage.md](../../documentation/architecture/storage.md)
|
||||
4. **Naming conventions?** → [/documentation/standards/naming-conventions.md](../../documentation/standards/naming-conventions.md)
|
||||
|
||||
## Files in This Folder
|
||||
|
||||
```text
|
||||
ansible/documentation/
|
||||
├── README.md # You are here
|
||||
├── ansible-knowledge/ # Ansible syntax and technical reference
|
||||
│ └── ansible-syntax.md
|
||||
├── playbooks/ # Operational guides for playbooks
|
||||
│ ├── README.md
|
||||
│ ├── manage_docker_environment.md
|
||||
│ ├── mount_nfs_shares.md
|
||||
│ ├── onboard_new_host.md
|
||||
│ ├── onboard-ansible-secrets.md
|
||||
│ └── watchtower-monitoring-onboarding.md
|
||||
├── reports/ # Analysis and audit reports
|
||||
│ └── prompt-analysis-2026-01-09.md
|
||||
└── standards/ # Ansible-specific standards
|
||||
└── ansible-quality-gates.md
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
When adding new documentation:
|
||||
|
||||
- **Ansible-specific content** (syntax, modules, playbook operations) → Add to this folder
|
||||
- **Homelab-wide content** (architecture, contracts, policies) → Add to `/documentation/` at the repository root
|
||||
|
||||
If unsure, ask: "Is this about how Ansible works, or about what the homelab architecture allows?"
|
||||
@ -0,0 +1,86 @@
|
||||
# Ansible Syntax Documentation
|
||||
|
||||
## 1. Overview
|
||||
|
||||
Ansible syntax defines the formal structure and permitted constructs for authoring Ansible playbooks, roles, tasks, and related configuration files. This document is the canonical reference for Ansible syntax. It supersedes all other interpretations and is immutable.
|
||||
|
||||
## 2. Syntax
|
||||
|
||||
### 2.1 Formal Rules
|
||||
|
||||
- Ansible configuration files are written in YAML format. All files must conform to YAML 1.2 specification.
|
||||
- Indentation is strictly enforced. Only spaces are permitted; tabs are prohibited.
|
||||
- Key-value pairs must be separated by a colon and a space (`key: value`).
|
||||
- Lists are denoted by a hyphen followed by a space (`- item`).
|
||||
- Boolean values must be expressed as `true` or `false` (lowercase, unquoted).
|
||||
- Strings may be unquoted or quoted using single (`'`) or double (`"`) quotes. Quoting is required if the string contains special characters, leading/trailing whitespace, or YAML-reserved words.
|
||||
- Comments begin with a hash (`#`) and are ignored by the parser.
|
||||
- Playbooks must begin with a list of plays. Each play is a YAML dictionary.
|
||||
- Each play must define at minimum the `hosts` key.
|
||||
- Tasks within plays are defined under the `tasks` key as a list.
|
||||
- Modules are invoked as dictionary keys within a task, with module arguments as subkeys.
|
||||
- Variable interpolation uses the Jinja2 syntax: `{{ variable_name }}`.
|
||||
- Block constructs (`block`, `rescue`, `always`) must be defined as lists under their respective keys.
|
||||
- Conditionals use the `when` key with a valid expression.
|
||||
- Loops use the `loop` or legacy `with_*` constructs.
|
||||
- Roles are included using the `roles` key as a list.
|
||||
- Handlers are defined under the `handlers` key as a list.
|
||||
- Tags are assigned using the `tags` key as a list.
|
||||
|
||||
### 2.2 Constraints
|
||||
|
||||
- All YAML files must be valid and parseable; syntax errors result in execution failure.
|
||||
- Indentation must be consistent throughout the file; mixing spaces and tabs is strictly prohibited.
|
||||
- Dictionary keys must be unique within their scope.
|
||||
- Reserved words (e.g., `hosts`, `tasks`, `vars`, `roles`, `handlers`, `tags`) must not be used as variable names.
|
||||
- Variable names must begin with a letter and may contain letters, numbers, and underscores only.
|
||||
- Jinja2 expressions must be syntactically valid and properly closed.
|
||||
- Only supported modules and plugins may be invoked; unknown modules result in failure.
|
||||
- All constructs must be defined in the correct context (e.g., `tasks` only within plays or roles).
|
||||
- File extensions:
|
||||
- Playbooks: `.yml` or `.yaml`
|
||||
- Inventory: `.ini`, `.yml`, `.yaml`
|
||||
- Variable files: `.yml`, `.yaml`
|
||||
- All files must use UTF-8 encoding.
|
||||
|
||||
### 2.3 Valid and Invalid Constructs
|
||||
|
||||
- Valid:
|
||||
- Properly indented YAML with correct key-value structure.
|
||||
- Use of supported Ansible keywords and modules.
|
||||
- Jinja2 variable interpolation within strings.
|
||||
- Invalid:
|
||||
- Use of tabs for indentation.
|
||||
- Duplicate keys within the same dictionary.
|
||||
- Unclosed or malformed Jinja2 expressions.
|
||||
- Use of unsupported or misspelled modules.
|
||||
- Mixing YAML and JSON syntax within the same file.
|
||||
|
||||
## 3. Best Practices
|
||||
|
||||
### 3.1 Required Practices
|
||||
|
||||
- Use consistent two-space indentation for all YAML files.
|
||||
- Explicitly quote strings containing special characters or reserved words.
|
||||
- Define all variables in dedicated variable files or under the `vars` key.
|
||||
- Use descriptive names for plays, tasks, and variables.
|
||||
- Validate YAML syntax before execution.
|
||||
|
||||
### 3.2 Prohibited Practices
|
||||
|
||||
- Do not use tabs for indentation.
|
||||
- Do not use reserved Ansible keywords as variable names.
|
||||
- Do not mix YAML and JSON syntax.
|
||||
- Do not define duplicate keys within the same dictionary.
|
||||
|
||||
### 3.3 Rationale
|
||||
|
||||
- Consistent indentation and quoting prevent parsing errors and ensure predictable execution.
|
||||
- Reserved keywords are protected to avoid namespace collisions and undefined behavior.
|
||||
|
||||
## 4. Non-Goals / Explicit Exclusions
|
||||
|
||||
- This document does not cover Ansible module functionality, plugin development, or execution semantics.
|
||||
- This document does not provide tutorials, usage examples, or workflow guidance.
|
||||
- This document does not address inventory file structure beyond syntax constraints.
|
||||
- Any information not explicitly stated herein is undefined and not governed by this document.
|
||||
@ -0,0 +1,56 @@
|
||||
## ✅ **Point 5 – Access & Identity – FINAL**
|
||||
|
||||
### **Role**
|
||||
|
||||
* Defines how operators, admins, and services authenticate and access the homelab
|
||||
* Covers remote access, SSO/identity, password/MFA policy, and onboarding/offboarding
|
||||
|
||||
---
|
||||
|
||||
### **Remote access methods**
|
||||
|
||||
* Supported: Omada VPN, Tailscale, VS Code Tunnel, SSH (as needed)
|
||||
* Operator-only: all remote access methods
|
||||
* End-user access: none (homelab is operator-managed only)
|
||||
* Public-facing services: must be authenticated and proxied; no direct management UI exposure
|
||||
|
||||
---
|
||||
|
||||
### **Identity & SSO**
|
||||
|
||||
* Authentik is deployed and serves as the centralized SSO/identity provider for the homelab
|
||||
* Operator/admin accounts are provisioned and managed via Authentik where possible; legacy per-service accounts should be migrated to SSO
|
||||
* All new services must integrate with Authentik for authentication if supported
|
||||
* Periodically review and update SSO integrations to ensure coverage and security
|
||||
|
||||
---
|
||||
|
||||
### **Passwords, MFA, and secrets**
|
||||
|
||||
* All admin/operator accounts must use strong, unique passwords
|
||||
* MFA is required wherever supported (VPN, SSO, cloud, etc.)
|
||||
* Credentials and secrets must be stored in a secure vault (e.g., Bitwarden, 1Password)
|
||||
|
||||
---
|
||||
|
||||
### **Operational constraints / "never do this"**
|
||||
|
||||
* Never expose management UIs (Proxmox, Watchtower, NAS, etc.) to the public internet
|
||||
* Never share admin/operator credentials
|
||||
* Never disable MFA on critical services
|
||||
* All access changes must be documented and reviewed
|
||||
|
||||
---
|
||||
|
||||
### **Onboarding/offboarding & change model**
|
||||
|
||||
* Onboarding: create accounts, set up VPN/Tailscale, grant secrets vault access
|
||||
* Offboarding: disable accounts, rotate credentials, audit access
|
||||
* Changes to access policy require contract update
|
||||
|
||||
---
|
||||
|
||||
### **Further considerations**
|
||||
|
||||
* Exact VPN/Tailscale/SSO setup details, onboarding checklists, and secrets management procedures will live in a separate, detailed access/identity doc (to be referenced here)
|
||||
* Access & identity contract should be reviewed at least annually or after major personnel/infra changes
|
||||
96
ansible/ansible-old/documentation/contracts/ComputePlane.md
Normal file
96
ansible/ansible-old/documentation/contracts/ComputePlane.md
Normal file
@ -0,0 +1,96 @@
|
||||
## ✅ **Point 2 – Compute Plane (OptiPlex Proxmox Cluster) – FINAL**
|
||||
|
||||
### **Role**
|
||||
|
||||
* Cluster that runs all Docker Swarm workloads
|
||||
* Separate from out-of-band control (Watchtower)
|
||||
* Designed to tolerate loss of one physical node without losing quorum
|
||||
|
||||
---
|
||||
|
||||
### **Physical hosts**
|
||||
|
||||
* 3× Dell OptiPlex Micro 7010: pve01-pve03
|
||||
* Local NVMe only; no shared storage dependency
|
||||
* Hosts sized with headroom; no aggressive CPU/RAM overcommit by default
|
||||
|
||||
---
|
||||
|
||||
### **Proxmox cluster**
|
||||
|
||||
* 3-node Proxmox VE cluster with Corosync over LAN
|
||||
* Static IPs on all hosts
|
||||
* vmbr0 = primary LAN bridge; VLAN-capable but unused initially
|
||||
* Proxmox HA: **off** by default (may be added later via separate design)
|
||||
|
||||
---
|
||||
|
||||
### **VM layout per host**
|
||||
|
||||
* Each OptiPlex runs exactly 2× Ubuntu Server LTS VMs:
|
||||
* 1× Swarm Manager VM
|
||||
* 1× Swarm Worker VM
|
||||
* No additional "misc" VMs on these hosts without an explicit architecture update
|
||||
|
||||
---
|
||||
|
||||
### **Swarm roles and placement**
|
||||
|
||||
* Total: 3 managers, 3 workers (one of each per host)
|
||||
* Managers hold Swarm Raft state and scheduling decisions
|
||||
* Workers run application workloads
|
||||
* Managers are schedulable only for light/infra tasks; no heavy or noisy apps
|
||||
* Node labels and placement constraints enforce "apps → workers" by default
|
||||
|
||||
---
|
||||
|
||||
### **Resource allocation (initial)**
|
||||
|
||||
* **Manager VM**
|
||||
* 2 vCPU
|
||||
* 4–6 GB RAM
|
||||
* ~40 GB disk
|
||||
* **Worker VM**
|
||||
* 4–6 vCPU
|
||||
* 16–24 GB RAM
|
||||
* ≥100 GB disk
|
||||
|
||||
---
|
||||
|
||||
### **Storage model**
|
||||
|
||||
* VM disks: local Proxmox storage (ZFS or LVM-thin), no shared VM disks
|
||||
* Container data: bind-mounts inside VMs
|
||||
* Swarm control plane and core workloads do **not** depend on shared storage
|
||||
* Production data path:
|
||||
* Primary: TerraMaster
|
||||
* Backup: TerraMaster → Synology via rsync
|
||||
* Offsite: Synology → cloud
|
||||
|
||||
---
|
||||
|
||||
### **Networking assumptions**
|
||||
|
||||
* All Proxmox hosts and VMs attach to primary LAN via vmbr0
|
||||
* Compute plane runs on a flat LAN at baseline
|
||||
* Detailed VLAN and IP design will live in a separate networking architecture document that this spec can reference
|
||||
|
||||
---
|
||||
|
||||
### **Operational constraints ("never do this")**
|
||||
|
||||
* Do **not** run Docker workloads or Swarm nodes directly on Proxmox hosts
|
||||
* Do **not** run heavy or stateful application stacks on manager VMs
|
||||
* Do **not** introduce shared storage as a hard dependency for Swarm or cluster boot
|
||||
* Do **not** use storage appliances (TerraMaster, Synology, etc.) as Swarm managers or workers
|
||||
|
||||
---
|
||||
|
||||
### **Expansion and change model**
|
||||
|
||||
* To add compute capacity:
|
||||
* Add a new OptiPlex node to the Proxmox cluster
|
||||
* Create at least one new Swarm Worker VM on that host
|
||||
* Join the VM to Swarm with standard labels and constraints
|
||||
* Gradually rebalance workloads; no redesign of existing nodes required
|
||||
* Any change that alters manager count, enables Proxmox HA, or significantly changes storage/networking models requires an explicit architecture review and doc update
|
||||
50
ansible/ansible-old/documentation/contracts/ControlPlane.md
Normal file
50
ansible/ansible-old/documentation/contracts/ControlPlane.md
Normal file
@ -0,0 +1,50 @@
|
||||
## ✅ **Point 1 – Control Plane (“Watchtower”) – FINAL**
|
||||
|
||||
### **Node**
|
||||
|
||||
* **Raspberry Pi 5**
|
||||
* OS: Raspberry Pi OS Lite (64-bit)
|
||||
|
||||
### **Purpose**
|
||||
|
||||
* Out-of-band control
|
||||
* Automation authority
|
||||
* Monitoring vantage point
|
||||
* Recovery access when everything else is down
|
||||
|
||||
---
|
||||
|
||||
### **Allowed services (explicit)**
|
||||
|
||||
* VS Code Tunnel
|
||||
* Ansible controller
|
||||
* Tailscale (always-on)
|
||||
* **Uptime Kuma**
|
||||
|
||||
* Single container
|
||||
* Bound to Tailscale IP only
|
||||
* No reverse proxy
|
||||
* No public ports
|
||||
* Outbound alerts only (email / Discord / etc.)
|
||||
|
||||
### **Explicit exclusions**
|
||||
|
||||
* No Traefik
|
||||
* No Authentik
|
||||
* No Swarm membership
|
||||
* No shared storage
|
||||
* No stateful apps beyond Kuma’s local data
|
||||
|
||||
### **Security posture**
|
||||
|
||||
* SSH key-only
|
||||
* Non-root admin
|
||||
* Firewall: SSH + Tailscale
|
||||
* Consider SD → NAS image backups
|
||||
|
||||
### **Operational contract**
|
||||
|
||||
* If this node is down: changes pause, nothing breaks
|
||||
* If everything else is down: this node is how you recover
|
||||
|
||||
---
|
||||
@ -0,0 +1,55 @@
|
||||
# Homelab Ansible Handover – v2 Architecture
|
||||
|
||||
## Purpose
|
||||
|
||||
This document summarizes the current homelab architecture and operational contracts. It is intended as a handover for an Ansible engineer to begin developing and maintaining infrastructure automation playbooks.
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
- **Control Plane:** Raspberry Pi 5 (“Watchtower”) – out-of-band management node. Runs Ansible controller, VS Code Tunnel, Tailscale, and Uptime Kuma. No production workloads or reverse proxies.
|
||||
- **Compute Plane:** 3× Dell OptiPlex Micro 7010 running Proxmox. Each host runs:
|
||||
- 1× Swarm Manager VM (control, light infra only)
|
||||
- 1× Swarm Worker VM (all app workloads)
|
||||
- **Networking:** Flat LAN (`10.0.0.0/24`), static IPs for infra, IoT/guest VLANs segregated. Future VLAN segmentation planned.
|
||||
- **Storage:** TerraMaster (primary data), Synology (backup, cloud sync). Rsync and cloud sync jobs run daily.
|
||||
- **Access & Identity:** Authentik SSO for operator/admin accounts. Remote access via Omada VPN, Tailscale, VS Code Tunnel. MFA and password vault required.
|
||||
|
||||
---
|
||||
|
||||
## Playbook Priorities & Expectations
|
||||
|
||||
1. **Idempotency:** All playbooks must be safe to run repeatedly and should not cause drift or break contracts.
|
||||
2. **Contracts:** Reference the v2 contracts in `architecture/v2/contracts/` for allowed/forbidden services, node roles, and operational constraints.
|
||||
3. **Inventory:** Maintain a clear, up-to-date inventory (hosts, groups, roles) reflecting the contracts.
|
||||
4. **Separation of Concerns:**
|
||||
- Control plane (Watchtower) is for automation, monitoring, and recovery only.
|
||||
- Compute plane (Proxmox VMs) runs all application workloads.
|
||||
- Never deploy workloads or Swarm nodes directly on Proxmox hosts or NAS devices.
|
||||
5. **Access:** Use Authentik SSO for all supported services. Document and automate onboarding/offboarding where possible.
|
||||
6. **Backups:** Automate and verify backup flows (TerraMaster → Synology → cloud). Never skip scheduled backups.
|
||||
7. **Security:** Never expose management UIs to the public internet. Enforce MFA and strong password policies.
|
||||
|
||||
---
|
||||
|
||||
## Immediate Playbook Targets
|
||||
|
||||
- Proxmox host and VM provisioning (with static IPs, labels, and roles)
|
||||
- Docker Swarm cluster setup and node role enforcement
|
||||
- NAS configuration and backup job automation
|
||||
- Authentik SSO integration for new services
|
||||
- Monitoring/alerting setup (Uptime Kuma, notifications)
|
||||
- Access onboarding/offboarding automation
|
||||
|
||||
---
|
||||
|
||||
## Reference
|
||||
|
||||
- Full contracts: `architecture/v2/contracts/`
|
||||
- Planning docs: `architecture/v2/plans/`
|
||||
- README: `architecture/v2/README.md`
|
||||
|
||||
---
|
||||
|
||||
**Contact the homelab owner for clarifications or to propose contract updates before making architectural changes.**
|
||||
69
ansible/ansible-old/documentation/contracts/Networking.md
Normal file
69
ansible/ansible-old/documentation/contracts/Networking.md
Normal file
@ -0,0 +1,69 @@
|
||||
## ✅ **Point 3 – Networking – FINAL**
|
||||
|
||||
### **Role**
|
||||
|
||||
* Defines how all homelab components (control, compute, storage, users) connect and communicate
|
||||
* Baseline: single-site, flat LAN for all core infra, with best-practice VLANs and segmentation as future upgrades
|
||||
|
||||
---
|
||||
|
||||
### **Baseline LAN**
|
||||
|
||||
* Primary LAN: `10.0.0.0/24` (gateway: `10.0.0.2`)
|
||||
* DHCP range: `10.0.0.50–10.0.0.150`
|
||||
* Static infra: `.2–.10` (infra), `.10–.14` (Proxmox), `.200+` (homelab), `.249` (Synology), `.250` (TerraMaster)
|
||||
* Key static IPs:
|
||||
* Watchtower: `10.0.0.200`
|
||||
* Proxmox hosts: `10.0.0.10–.14`
|
||||
* Synology: `10.0.0.249`
|
||||
* TerraMaster: `10.0.0.250`
|
||||
* All core infra and homelab services live in the "main" VLAN
|
||||
* IoT is segregated; guest WiFi VLAN exists but is unused
|
||||
|
||||
---
|
||||
|
||||
### **Service exposure & remote access**
|
||||
|
||||
* Most services are reverse-proxied via Traefik and exposed to the internet
|
||||
* Tailscale is used for network ingress, not direct service exposure
|
||||
* Operator remote access: Omada VPN, Tailscale, VS Code Tunnel; SSH/terminal access can be added as needed
|
||||
* Management UIs (Proxmox, Watchtower, NAS) are not intentionally public, but most services are proxied
|
||||
|
||||
---
|
||||
|
||||
### **Interconnection & segmentation**
|
||||
|
||||
* Watchtower can reach all Proxmox hosts, Synology, and TerraMaster directly (no firewall blocks)
|
||||
* Homelab is entirely in the "main" VLAN; IoT is isolated; guest VLAN is unused
|
||||
* Segmentation exists for IoT, but not for homelab/infra yet; setup should be reviewed periodically
|
||||
|
||||
---
|
||||
|
||||
### **Future VLAN model (intent)**
|
||||
|
||||
* Follow best practices for small networks:
|
||||
* mgmt: hypervisors, switches, Watchtower
|
||||
* workloads: Swarm worker VMs, app traffic
|
||||
* storage: NAS traffic
|
||||
* users/guests: client devices
|
||||
* All VLANs must be isolated except via explicit firewall rules
|
||||
* Review and update segmentation as needs evolve
|
||||
|
||||
---
|
||||
|
||||
### **Operational constraints / "never do this"**
|
||||
|
||||
* Never bridge production and lab VLANs
|
||||
* Never expose management VLAN or core infra directly to the internet
|
||||
* Never allow IoT VLAN to reach core infra or management
|
||||
* Never mix guest and production traffic without a firewall
|
||||
* All changes to VLANs, firewall, or router config must be deliberate and documented
|
||||
|
||||
---
|
||||
|
||||
### **Further considerations**
|
||||
|
||||
* Exact VLAN IDs, IP ranges, DHCP/DNS, and firewall rules will live in a separate, detailed networking doc (to be referenced here)
|
||||
* Networking is single-site only; future multi-site/remote backup will require explicit design
|
||||
* Router/firewall implementation details (e.g., Omada, OPNsense, UniFi) will be documented separately; this contract is vendor-neutral
|
||||
* Review this contract and underlying network setup at least annually or after major infra changes
|
||||
53
ansible/ansible-old/documentation/contracts/Storage.md
Normal file
53
ansible/ansible-old/documentation/contracts/Storage.md
Normal file
@ -0,0 +1,53 @@
|
||||
## ✅ **Point 4 – Storage – FINAL**
|
||||
|
||||
### **Role**
|
||||
|
||||
* Defines how production and backup data is stored, protected, and accessed in the homelab
|
||||
* Focuses on NAS devices (TerraMaster, Synology), backup flows, and operational rules
|
||||
|
||||
---
|
||||
|
||||
### **NAS device roles**
|
||||
|
||||
* **TerraMaster**: primary production data store
|
||||
* **Synology**: backup target for TerraMaster, staging for offsite/cloud
|
||||
* Both: never run compute workloads or join Swarm
|
||||
|
||||
---
|
||||
|
||||
### **Data flows**
|
||||
|
||||
* Production data written to TerraMaster
|
||||
* Rsync from TerraMaster to Synology runs multiple times daily (staged for noon, repeats until 11pm)
|
||||
* Synology uploads to cloud via daily cloud sync task
|
||||
* VM/container data: backed up via app-level exports or VM snapshots (optional/TBD)
|
||||
|
||||
---
|
||||
|
||||
### **Backup policy**
|
||||
|
||||
* Minimum: daily local backup (TerraMaster → Synology), daily offsite (Synology → cloud)
|
||||
* Retention: at least 30 days for critical data
|
||||
* Verification: periodic restore tests (cadence TBD)
|
||||
|
||||
---
|
||||
|
||||
### **Operational constraints / "never do this"**
|
||||
|
||||
* Never run Docker/Swarm workloads on NAS
|
||||
* Never use NAS as a dependency for Swarm control-plane health
|
||||
* Never skip scheduled backups without explicit, documented exception
|
||||
|
||||
---
|
||||
|
||||
### **Expansion and change model**
|
||||
|
||||
* Add new storage only by explicit design update
|
||||
* Changes to backup cadence, retention, or offsite policy require contract update
|
||||
|
||||
---
|
||||
|
||||
### **Further considerations**
|
||||
|
||||
* Exact backup scripts, schedules, and cloud provider details will live in a separate, detailed storage/backup doc (to be referenced here)
|
||||
* Storage contract should be reviewed at least annually or after major infra changes
|
||||
19
ansible/ansible-old/documentation/playbooks/README.md
Normal file
19
ansible/ansible-old/documentation/playbooks/README.md
Normal file
@ -0,0 +1,19 @@
|
||||
# Playbook operation guides
|
||||
|
||||
This folder contains operator-facing guides for playbook execution.
|
||||
|
||||
## Available runbooks
|
||||
|
||||
- [Authentik deployment checklist](deploy-authentik.md)
|
||||
- [Manage Docker environment](manage_docker_environment.md)
|
||||
- [Mount NFS shares](mount_nfs_shares.md)
|
||||
- [Onboard ansible secrets](onboard-ansible-secrets.md)
|
||||
- [Onboard non-Proxmox host (new + existing)](onboard_new_host.md)
|
||||
- [Watchtower monitoring onboarding and self-healing](watchtower-monitoring-onboarding.md)
|
||||
|
||||
## Usage pattern
|
||||
|
||||
1. Validate prerequisites in the runbook.
|
||||
2. Run playbook commands exactly as documented.
|
||||
3. Verify service health and access paths.
|
||||
4. Record outcomes and follow rollback steps when needed.
|
||||
@ -0,0 +1,137 @@
|
||||
# Deploy Ansible MCP server on Watchtower
|
||||
|
||||
## Purpose
|
||||
|
||||
Deploy a custom Ansible MCP server on Watchtower so AI tools can query inventory,
|
||||
validate syntax, and run allowlisted playbooks through guarded tool calls.
|
||||
|
||||
## Scope
|
||||
|
||||
- Host: `watchtower` inventory group
|
||||
- Playbook: `ansible/playbooks/ai/deploy_ansible_mcp_watchtower.yml`
|
||||
- Runtime path: `/opt/ansible-mcp`
|
||||
- Service name: `ansible-mcp`
|
||||
- State and logs: `/var/lib/ansible-mcp`
|
||||
|
||||
## Features delivered
|
||||
|
||||
- MCP tools:
|
||||
- `health`
|
||||
- `list_inventory`
|
||||
- `validate_syntax`
|
||||
- `run_playbook`
|
||||
- `get_job_status`
|
||||
- `cancel_job`
|
||||
- Path guardrails for playbook execution (allowlisted directories only)
|
||||
- Optional explicit playbook allowlist for high-trust execution scopes
|
||||
- Write-mode guardrails:
|
||||
- global write toggle
|
||||
- explicit confirm gate for write actions
|
||||
- Auth guardrail:
|
||||
- bearer token required when `ANSIBLE_MCP_API_TOKEN` is configured
|
||||
- Input guardrails:
|
||||
- max `extra_vars` payload size
|
||||
- blocked `extra_vars` key list
|
||||
- Background run tracking with per-run logs and status records
|
||||
- JSONL audit records at `/var/lib/ansible-mcp/audit/events.jsonl`
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. Watchtower host is reachable from control node.
|
||||
2. Python 3 is installed on Watchtower.
|
||||
3. Inventory contains a valid `watchtower` group.
|
||||
4. Ansible control node has access to this repository at `/home/chester/homelab`.
|
||||
|
||||
## Deploy
|
||||
|
||||
Run from `ansible/`:
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
export ANSIBLE_MCP_API_TOKEN='set-a-strong-token-before-deploy'
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/ai/deploy_ansible_mcp_watchtower.yml
|
||||
```
|
||||
|
||||
Validate only:
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/ai/deploy_ansible_mcp_watchtower.yml --check
|
||||
```
|
||||
|
||||
## Runtime configuration
|
||||
|
||||
The playbook sets these environment variables in the systemd unit:
|
||||
|
||||
- `ANSIBLE_MCP_REPO_ROOT=/home/chester/homelab/ansible`
|
||||
- `ANSIBLE_MCP_INVENTORY=inventory/hosts.ini`
|
||||
- `ANSIBLE_MCP_ALLOWED_PLAYBOOK_DIRS=playbooks`
|
||||
- `ANSIBLE_MCP_ALLOWED_PLAYBOOKS=` (optional comma-separated explicit allowlist)
|
||||
- `ANSIBLE_MCP_API_TOKEN=<token>` (required for HTTP transport in current playbook)
|
||||
- `ANSIBLE_MCP_ALLOW_WRITE=true`
|
||||
- `ANSIBLE_MCP_REQUIRE_CONFIRM=true`
|
||||
- `ANSIBLE_MCP_DEFAULT_TIMEOUT=900`
|
||||
- `ANSIBLE_MCP_MAX_TIMEOUT=3600`
|
||||
- `ANSIBLE_MCP_MAX_EXTRA_VARS_BYTES=16384`
|
||||
- `ANSIBLE_MCP_BLOCKED_EXTRA_VARS_KEYS=ansible_password,ansible_become_password,vault_password`
|
||||
- `ANSIBLE_MCP_STATE_DIR=/var/lib/ansible-mcp`
|
||||
- `ANSIBLE_MCP_TRANSPORT=streamable-http`
|
||||
- `ANSIBLE_MCP_HOST=0.0.0.0`
|
||||
- `ANSIBLE_MCP_PORT=8449`
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
# Service state
|
||||
sudo systemctl status ansible-mcp --no-pager
|
||||
|
||||
# Recent logs
|
||||
sudo journalctl -u ansible-mcp -n 80 --no-pager
|
||||
|
||||
# Listening port
|
||||
ss -ltnp | grep 8449
|
||||
```
|
||||
|
||||
## Client connection example
|
||||
|
||||
For MCP clients that support HTTP transport:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"ansible-watchtower": {
|
||||
"type": "http",
|
||||
"url": "http://10.0.0.200:8449/mcp",
|
||||
"headers": {
|
||||
"Authorization": "Bearer ${env:ANSIBLE_MCP_API_TOKEN}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you terminate TLS upstream (recommended), expose this endpoint through your
|
||||
existing ingress and use an HTTPS URL.
|
||||
|
||||
## Operational safety notes
|
||||
|
||||
- Keep `ANSIBLE_MCP_REQUIRE_CONFIRM=true` in write mode.
|
||||
- Keep `ANSIBLE_MCP_API_TOKEN` set and rotate it regularly.
|
||||
- Prefer explicit `ANSIBLE_MCP_ALLOWED_PLAYBOOKS` over broad directory allowlists.
|
||||
- Restrict `ANSIBLE_MCP_ALLOWED_PLAYBOOK_DIRS` to known-safe playbook roots.
|
||||
- Do not grant broad filesystem access to the service user.
|
||||
- Treat background run logs in `/var/lib/ansible-mcp/logs` as audit artifacts.
|
||||
|
||||
## Rollback
|
||||
|
||||
```bash
|
||||
sudo systemctl disable --now ansible-mcp
|
||||
sudo rm -f /etc/systemd/system/ansible-mcp.service
|
||||
sudo systemctl daemon-reload
|
||||
```
|
||||
|
||||
Optional cleanup:
|
||||
|
||||
```bash
|
||||
sudo rm -rf /opt/ansible-mcp /var/lib/ansible-mcp
|
||||
```
|
||||
606
ansible/ansible-old/documentation/playbooks/deploy-authentik.md
Normal file
606
ansible/ansible-old/documentation/playbooks/deploy-authentik.md
Normal file
@ -0,0 +1,606 @@
|
||||
# Authentik deployment checklist
|
||||
|
||||
## Purpose
|
||||
|
||||
This runbook is the operator path for deploying, verifying, and handing off
|
||||
Authentik as the homelab identity provider.
|
||||
|
||||
It covers:
|
||||
|
||||
- Preflight checks: secrets, Swarm state, storage, and network readiness.
|
||||
- Deployment execution using the canonical Ansible playbook.
|
||||
- Service convergence and health verification.
|
||||
- Ingress and functional smoke tests against the live endpoint.
|
||||
- Post-deploy hardening, evidence capture, and rollback guidance.
|
||||
- Day-1 troubleshooting for common failure modes.
|
||||
|
||||
## Scope
|
||||
|
||||
- **Stack name:** `authentik`
|
||||
- **Canonical playbook:** `ansible/playbooks/docker/deploy_authentik.yml`
|
||||
- **Stack template:** `ansible/templates/stacks/authentik.stack.yml`
|
||||
- **Target manager:** `swarm-manager-1` (`10.0.0.211`)
|
||||
- **Public URL:** `https://sso.castaldifamily.com`
|
||||
- **Data root:** `/mnt/homelab/apps/authentik`
|
||||
- **Services deployed:** `authentik-postgres`, `authentik-redis`, `authentik-server`, `authentik-worker`
|
||||
|
||||
> [!IMPORTANT]
|
||||
> This stack uses **absolute bind mounts**. The deploy playbook requires all data
|
||||
> directories to exist before deployment. If any path is missing, the preflight
|
||||
> asserts will fail-safe and abort rather than bootstrap an empty installation
|
||||
> over existing data.
|
||||
|
||||
---
|
||||
|
||||
## Deployment flow
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
preflight[Phase 1 — Preflight] --> validation[Phase 2 — Validation run]
|
||||
validation --> deploy[Phase 3 — Deploy]
|
||||
deploy --> convergence[Phase 4 — Convergence]
|
||||
convergence --> ingress[Phase 5 — Ingress checks]
|
||||
ingress --> handoff[Phase 6 — Handoff]
|
||||
|
||||
classDef phase fill:#dbeafe,stroke:#3b82f6;
|
||||
class preflight,validation,deploy,convergence,ingress,handoff phase
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Preflight checklist
|
||||
|
||||
Complete all items in this phase before running any playbook command.
|
||||
|
||||
### 1.1 Change window and ownership
|
||||
|
||||
- [ ] Deployment owner is assigned.
|
||||
- [ ] Rollback owner is assigned.
|
||||
- [ ] Maintenance window is confirmed.
|
||||
- [ ] No active cluster incidents in the latest Swarm audit
|
||||
(`outputs/swarm_audit_*.md`).
|
||||
|
||||
### 1.2 Control node readiness
|
||||
|
||||
Run from the `ansible/` directory with the virtual environment active.
|
||||
|
||||
```bash
|
||||
# Confirm Python environment
|
||||
source /home/chester/homelab/.venv/bin/activate
|
||||
|
||||
# Confirm Ansible version (must be >= 2.18.0)
|
||||
ansible --version
|
||||
|
||||
# Confirm SSH access to all Swarm managers
|
||||
ansible swarm_managers -i inventory/hosts.ini -m ping
|
||||
```
|
||||
|
||||
- [ ] Ansible version is `2.18.0` or higher.
|
||||
- [ ] All Swarm managers return `pong`.
|
||||
- [ ] Vault password is available (`.vault_pass` file present or `ANSIBLE_VAULT_PASSWORD_FILE` set).
|
||||
|
||||
### 1.3 Secrets readiness
|
||||
|
||||
The deploy playbook asserts both values are defined, non-empty, and not
|
||||
placeholder strings. Verify them first:
|
||||
|
||||
```bash
|
||||
ansible -i inventory/hosts.ini localhost \
|
||||
-m ansible.builtin.debug \
|
||||
-a "msg={{ vault_authentik_secret_key | length }}" \
|
||||
-e "@group_vars/all.yml" \
|
||||
--vault-password-file .vault_pass
|
||||
```
|
||||
|
||||
Repeat for `vault_authentik_postgres_password`.
|
||||
|
||||
- [ ] `vault_authentik_secret_key` decrypts to a non-empty, non-placeholder value.
|
||||
- [ ] `vault_authentik_postgres_password` decrypts to a non-empty, non-placeholder value.
|
||||
- [ ] Neither value is any of: `change-me`, `changeme`, `your-random-secret`, `your-db-password`.
|
||||
|
||||
### 1.4 Swarm cluster state
|
||||
|
||||
```bash
|
||||
# Confirm target manager is active and is control-plane
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker info --format '{{.Swarm.LocalNodeState}}|{{.Swarm.ControlAvailable}}'"
|
||||
# Expected output: active|true
|
||||
|
||||
# Confirm all managers are active
|
||||
ansible swarm_managers -i inventory/hosts.ini \
|
||||
-m ansible.builtin.command \
|
||||
-a "docker info --format '{{.Swarm.LocalNodeState}}'"
|
||||
```
|
||||
|
||||
- [ ] `swarm-manager-1` returns `active|true`.
|
||||
- [ ] All three managers return `active`.
|
||||
- [ ] No node shows `inactive`, `pending`, or `error`.
|
||||
|
||||
### 1.5 External overlay network
|
||||
|
||||
Authentik requires `proxy-net` to exist before stack deploy.
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker network ls --filter name=proxy-net --format '{{.Name}}|{{.Driver}}|{{.Scope}}'"
|
||||
# Expected: proxy-net|overlay|swarm
|
||||
```
|
||||
|
||||
- [ ] `proxy-net` exists with `overlay` driver and `swarm` scope.
|
||||
|
||||
> [!WARNING]
|
||||
> If `proxy-net` is missing, create it before continuing:
|
||||
> ```bash
|
||||
> ssh chester@10.0.0.211 \
|
||||
> "docker network create --driver overlay --attachable proxy-net"
|
||||
> ```
|
||||
|
||||
### 1.6 Persistent data paths
|
||||
|
||||
All bind-mount paths must exist on `swarm-manager-1` **before** deploying.
|
||||
The playbook will fail-safe if any are missing.
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 "for d in \
|
||||
/mnt/homelab/apps/authentik \
|
||||
/mnt/homelab/apps/authentik/data \
|
||||
/mnt/homelab/apps/authentik/data/database \
|
||||
/mnt/homelab/apps/authentik/data/redis \
|
||||
/mnt/homelab/apps/authentik/data/media \
|
||||
/mnt/homelab/apps/authentik/data/config \
|
||||
/mnt/homelab/apps/authentik/data/blueprints; do
|
||||
[ -d \"\$d\" ] && echo \"OK \$d\" || echo \"MISSING \$d\"
|
||||
done"
|
||||
```
|
||||
|
||||
- [ ] All 7 paths return `OK`.
|
||||
- [ ] If any path is `MISSING`, create or restore from backup before proceeding.
|
||||
|
||||
To create paths for a **fresh install** (no existing data to protect):
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 "sudo mkdir -p \
|
||||
/mnt/homelab/apps/authentik/data/database \
|
||||
/mnt/homelab/apps/authentik/data/redis \
|
||||
/mnt/homelab/apps/authentik/data/media \
|
||||
/mnt/homelab/apps/authentik/data/config \
|
||||
/mnt/homelab/apps/authentik/data/blueprints"
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> Do not create missing paths if you are restoring an existing Authentik install.
|
||||
> Restore from backup first to avoid initialising an empty database over
|
||||
> pre-existing data.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Validation-only run
|
||||
|
||||
Run the playbook in validation mode to confirm all asserts pass before
|
||||
changing anything on the cluster.
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
|
||||
ansible-playbook \
|
||||
-i inventory/hosts.ini \
|
||||
playbooks/docker/deploy_authentik.yml \
|
||||
-e "stack_validate_only=true" \
|
||||
--vault-password-file .vault_pass
|
||||
```
|
||||
|
||||
- [ ] Playbook completes with `0` failed tasks.
|
||||
- [ ] Secrets assertion tasks pass (no `FAILED` on assert blocks).
|
||||
- [ ] Swarm manager state assertion passes.
|
||||
- [ ] Data path assertions pass for all 7 required directories.
|
||||
|
||||
**Stop here if any assert fails.** Diagnose using the
|
||||
[Troubleshooting matrix](#troubleshooting-matrix) below, then re-run validation
|
||||
before proceeding.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Deployment execution
|
||||
|
||||
Run the standard deploy. All playbook output should be captured for the
|
||||
evidence record.
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
|
||||
ansible-playbook \
|
||||
-i inventory/hosts.ini \
|
||||
playbooks/docker/deploy_authentik.yml \
|
||||
--vault-password-file .vault_pass \
|
||||
2>&1 | tee ../outputs/authentik_deploy_$(date +%Y%m%dT%H%M%S).log
|
||||
```
|
||||
|
||||
- [ ] Playbook completes without `FAILED` tasks.
|
||||
- [ ] Deployment result block is printed confirming stack name, manager, and URL.
|
||||
- [ ] Log file is saved to `outputs/` with a timestamp.
|
||||
|
||||
**Expected deployment result output:**
|
||||
|
||||
```
|
||||
"Authentik deployment complete."
|
||||
"Stack : authentik"
|
||||
"Manager : swarm-manager-1 (10.0.0.211)"
|
||||
"URL : https://sso.castaldifamily.com"
|
||||
"Data root : /mnt/homelab/apps/authentik"
|
||||
"Services : authentik-postgres, authentik-redis, authentik-server, authentik-worker"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Service convergence and health
|
||||
|
||||
Verify that all four services are running, stable, and healthy.
|
||||
|
||||
### 4.1 Service replica status
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service ls --filter label=com.docker.stack.namespace=authentik"
|
||||
```
|
||||
|
||||
Expected replica counts:
|
||||
|
||||
| Service | Expected |
|
||||
| :--- | :---: |
|
||||
| `authentik_authentik-postgres` | `1/1` |
|
||||
| `authentik_authentik-redis` | `1/1` |
|
||||
| `authentik_authentik-server` | `1/1` |
|
||||
| `authentik_authentik-worker` | `1/1` |
|
||||
|
||||
- [ ] All four services show `1/1` replicas.
|
||||
- [ ] No service shows `0/1` or a failure count.
|
||||
|
||||
### 4.2 Service placement
|
||||
|
||||
All four services must be pinned to `swarm-manager-1`.
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service ps authentik_authentik-server --filter desired-state=running --format '{{.Node}} {{.CurrentState}}'"
|
||||
# Expected: swarm-manager-1 Running ...
|
||||
```
|
||||
|
||||
- [ ] `authentik-server` task is running on `swarm-manager-1`.
|
||||
- [ ] `authentik-worker` task is running on `swarm-manager-1`.
|
||||
|
||||
### 4.3 Container health checks
|
||||
|
||||
```bash
|
||||
# postgres health (pg_isready)
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker ps --filter name=authentik_authentik-postgres --format '{{.Status}}'"
|
||||
# Expected: Up ... (healthy)
|
||||
|
||||
# redis health (redis-cli ping)
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker ps --filter name=authentik_authentik-redis --format '{{.Status}}'"
|
||||
# Expected: Up ... (healthy)
|
||||
```
|
||||
|
||||
- [ ] `authentik-postgres` container shows `(healthy)`.
|
||||
- [ ] `authentik-redis` container shows `(healthy)`.
|
||||
|
||||
### 4.4 Critical startup log checks
|
||||
|
||||
```bash
|
||||
# Check server startup for migration and database connectivity
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service logs authentik_authentik-server --since 10m --no-task-ids 2>&1 | tail -40"
|
||||
|
||||
# Check worker for job queue connectivity
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service logs authentik_authentik-worker --since 10m --no-task-ids 2>&1 | tail -40"
|
||||
```
|
||||
|
||||
- [ ] No `FATAL` or `ERROR` messages relating to database connection in server logs.
|
||||
- [ ] No `FATAL` or `ERROR` messages relating to Redis connection in server or worker logs.
|
||||
- [ ] Database migration messages complete without errors.
|
||||
- [ ] No repeated container restart events (no `started 2+ times`).
|
||||
|
||||
### 4.5 Resource limits in effect
|
||||
|
||||
| Service | Memory limit | CPU limit |
|
||||
| :--- | :---: | :---: |
|
||||
| `authentik-postgres` | 1 G | 0.75 |
|
||||
| `authentik-redis` | 512 M | 0.50 |
|
||||
| `authentik-server` | 2 G | 1.0 |
|
||||
| `authentik-worker` | 1 G | 0.75 |
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service inspect authentik_authentik-server \
|
||||
--format '{{.Spec.TaskTemplate.Resources.Limits.MemoryBytes}}'"
|
||||
# Expected: 2147483648 (2 GB)
|
||||
```
|
||||
|
||||
- [ ] Resource limits are present and match the table above.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Ingress and functional verification
|
||||
|
||||
### 5.1 Traefik route registration
|
||||
|
||||
Traefik routes are published via `traefik-kop`. Verify the route is active before
|
||||
testing the public endpoint.
|
||||
|
||||
```bash
|
||||
# Check Traefik router for the authentik rule
|
||||
curl -fsS http://10.0.0.151:8080/api/http/routers/authentik@docker \
|
||||
| python3 -m json.tool | grep -E '"rule"|"status"'
|
||||
# Expected: "rule": "Host(...sso.castaldifamily.com...)", "status": "enabled"
|
||||
```
|
||||
|
||||
- [ ] Traefik router `authentik@docker` exists and is `enabled`.
|
||||
- [ ] Router rule matches `Host('sso.castaldifamily.com')`.
|
||||
- [ ] Middlewares include `security-headers@file` and `ratelimit-basic@file`.
|
||||
|
||||
### 5.2 HTTPS endpoint reachability
|
||||
|
||||
```bash
|
||||
# TLS handshake and HTTP 200/302 response
|
||||
curl -fsS -o /dev/null -w "%{http_code} %{ssl_verify_result}" \
|
||||
https://sso.castaldifamily.com
|
||||
# Expected: 200 0 (or 302 0 for a redirect to login)
|
||||
```
|
||||
|
||||
- [ ] curl returns HTTP `200` or `302`.
|
||||
- [ ] `ssl_verify_result` is `0` (certificate valid).
|
||||
- [ ] Response is not a Traefik 404 or 502.
|
||||
|
||||
### 5.3 Login page load
|
||||
|
||||
Open `https://sso.castaldifamily.com` in a browser.
|
||||
|
||||
- [ ] Authentik login page loads without JavaScript errors.
|
||||
- [ ] Page title includes "authentik" or "Sign in".
|
||||
- [ ] No TLS certificate warning from the browser.
|
||||
|
||||
### 5.4 Admin UI readiness (if initial deploy)
|
||||
|
||||
Navigate to `https://sso.castaldifamily.com/if/flow/initial-setup/`
|
||||
|
||||
- [ ] Initial setup flow is reachable on first-run bootstrap.
|
||||
- [ ] Skip this step if the instance already existed; do not re-run initial setup
|
||||
on an existing install.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Post-deploy handoff
|
||||
|
||||
### 6.1 Monitoring integration
|
||||
|
||||
Authentik is referenced as the SSO provider in `group_vars/all.yml`:
|
||||
|
||||
```yaml
|
||||
monitoring:
|
||||
authentik_host: "https://sso.castaldifamily.com"
|
||||
```
|
||||
|
||||
- [ ] Uptime Kuma has a monitor for `https://sso.castaldifamily.com`.
|
||||
- [ ] Prometheus or health check system is alerting on `authentik_authentik-server`
|
||||
replica count dropping below 1.
|
||||
|
||||
### 6.2 Backup verification
|
||||
|
||||
- [ ] `/mnt/homelab/apps/authentik/data/database` is included in backup scope.
|
||||
- [ ] A manual backup snapshot was taken before or immediately after deploy.
|
||||
- [ ] Restore procedure is documented and tested (or explicitly deferred).
|
||||
|
||||
### 6.3 Secret rotation awareness
|
||||
|
||||
| Secret | Rotation procedure |
|
||||
| :--- | :--- |
|
||||
| `vault_authentik_secret_key` | Update vault → redeploy stack → running sessions are invalidated |
|
||||
| `vault_authentik_postgres_password` | Update vault AND postgres user password → redeploy |
|
||||
|
||||
- [ ] Rotation procedure is known to the deployment owner.
|
||||
|
||||
### 6.4 Evidence capture
|
||||
|
||||
```bash
|
||||
# Save service state snapshot
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service ls --filter label=com.docker.stack.namespace=authentik" \
|
||||
> ../outputs/authentik_service_snapshot_$(date +%Y%m%dT%H%M%S).txt
|
||||
```
|
||||
|
||||
- [ ] Deploy log saved to `outputs/authentik_deploy_<timestamp>.log`.
|
||||
- [ ] Service state snapshot saved to `outputs/authentik_service_snapshot_<timestamp>.txt`.
|
||||
- [ ] Deployment timestamp and verification timestamp recorded in this checklist.
|
||||
|
||||
### 6.5 Deployment sign-off
|
||||
|
||||
| Field | Value |
|
||||
| :--- | :--- |
|
||||
| Deployment owner | |
|
||||
| Deployment timestamp | |
|
||||
| Verification timestamp | |
|
||||
| Endpoint verified | `https://sso.castaldifamily.com` |
|
||||
| Final status | ☐ GREEN — all phases passed |
|
||||
|
||||
---
|
||||
|
||||
## Rollback procedure
|
||||
|
||||
If deployment fails or causes instability, remove the stack and preserve data.
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
|
||||
ansible-playbook \
|
||||
-i inventory/hosts.ini \
|
||||
playbooks/docker/deploy_authentik.yml \
|
||||
-e "authentik_deploy_state=absent" \
|
||||
--vault-password-file .vault_pass
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> `authentik_deploy_state=absent` removes the **Swarm stack** (containers,
|
||||
> services, configs) but does **not** delete the bind-mount data directories.
|
||||
> Data at `/mnt/homelab/apps/authentik` is preserved for re-deploy or restore.
|
||||
|
||||
- [ ] Stack removed cleanly (`docker stack ls` shows no `authentik` entry).
|
||||
- [ ] Data directories still intact on `swarm-manager-1`.
|
||||
- [ ] Root cause identified before re-deploying.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting matrix
|
||||
|
||||
### Validation assert fails: secrets not defined or placeholder
|
||||
|
||||
**Symptom:** Playbook fails on `Assert vault_authentik_secret_key is defined` or
|
||||
`Assert Authentik secrets are not placeholders`.
|
||||
|
||||
**Check:**
|
||||
|
||||
```bash
|
||||
ansible -i inventory/hosts.ini localhost \
|
||||
-m ansible.builtin.debug \
|
||||
-a "var=vault_authentik_secret_key" \
|
||||
-e "@group_vars/all.yml" \
|
||||
--vault-password-file .vault_pass
|
||||
```
|
||||
|
||||
**Fix:** Encrypt and store the correct value:
|
||||
|
||||
```bash
|
||||
ansible-vault encrypt_string 'YOUR-KEY' \
|
||||
--name 'vault_authentik_secret_key' \
|
||||
--vault-password-file .vault_pass
|
||||
# Paste output into group_vars/vault/all.yml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Validation assert fails: data paths missing
|
||||
|
||||
**Symptom:** Playbook fails on `Assert required Authentik paths exist before deploy`.
|
||||
|
||||
**Check:**
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 "ls -la /mnt/homelab/apps/authentik/"
|
||||
```
|
||||
|
||||
**Fix (fresh install only):**
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 "sudo mkdir -p \
|
||||
/mnt/homelab/apps/authentik/data/{database,redis,media,config,blueprints}"
|
||||
```
|
||||
|
||||
**Fix (existing install):** Restore from backup before creating directories.
|
||||
|
||||
---
|
||||
|
||||
### Swarm assert fails: manager not active or not control plane
|
||||
|
||||
**Symptom:** Playbook fails on `Assert target is an active Swarm manager`.
|
||||
|
||||
**Check:**
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 "docker info --format '{{.Swarm.LocalNodeState}}'"
|
||||
```
|
||||
|
||||
**Fix:** Investigate Swarm manager health. Do not proceed until a healthy quorum
|
||||
manager is the deploy target.
|
||||
|
||||
---
|
||||
|
||||
### Services not converging to 1/1
|
||||
|
||||
**Symptom:** `docker service ls` shows `0/1` or a service cycles through restarts.
|
||||
|
||||
**Check:**
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service ps authentik_authentik-server --no-trunc"
|
||||
```
|
||||
|
||||
Look for failure reasons in the `Error` column.
|
||||
|
||||
**Common causes:**
|
||||
|
||||
| Cause | Evidence in logs | Fix |
|
||||
| :--- | :--- | :--- |
|
||||
| Secret key mismatch | `cryptography error` or `key invalid` in server logs | Re-check vault value, redeploy |
|
||||
| Postgres not healthy yet | `connection refused` in server logs | Wait for postgres `(healthy)`, then check server |
|
||||
| Redis not reachable | `redis connection error` in server or worker logs | Confirm `authentik-redis` is `1/1` healthy first |
|
||||
| Missing bind-mount path | `no such file or directory` in container start | Create path, redeploy |
|
||||
| Insufficient memory | OOM kill in `docker service ps` error column | Check node resources, adjust limits if needed |
|
||||
|
||||
---
|
||||
|
||||
### Traefik route not registered or 502 response
|
||||
|
||||
**Symptom:** `curl https://sso.castaldifamily.com` returns `502 Bad Gateway` or
|
||||
connection refused.
|
||||
|
||||
**Check:**
|
||||
|
||||
```bash
|
||||
# Confirm traefik-kop is running (Swarm stack)
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service ls --filter name=traefik-kop"
|
||||
|
||||
# Check server is listening on port 9000
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service ps authentik_authentik-server --filter desired-state=running"
|
||||
```
|
||||
|
||||
**Common causes:**
|
||||
|
||||
- `traefik-kop` is not running → deploy monitoring stack first.
|
||||
- `authentik-server` is not bound on port `9000` → check replica and restart.
|
||||
- `edge_routing.swarm.bind_ip` is incorrect in `group_vars/all.yml` → verify
|
||||
it resolves to an active Swarm node.
|
||||
- Cloudflare DNS is not pointing to `10.0.0.151` → verify DNS record for
|
||||
`sso.castaldifamily.com`.
|
||||
|
||||
---
|
||||
|
||||
### Database migration errors on first boot
|
||||
|
||||
**Symptom:** Server logs show migration errors or `relation does not exist`.
|
||||
|
||||
**Check:**
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service logs authentik_authentik-server --since 5m 2>&1 | grep -i 'migrat\|error\|fatal'"
|
||||
```
|
||||
|
||||
**Fix:** Migrations run automatically on startup. If they fail:
|
||||
|
||||
1. Check postgres is `(healthy)` and accepting connections.
|
||||
2. Check `vault_authentik_postgres_password` in vault matches the running
|
||||
postgres password.
|
||||
3. Restart the server service to trigger a re-run:
|
||||
|
||||
```bash
|
||||
ssh chester@10.0.0.211 \
|
||||
"docker service update --force authentik_authentik-server"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Reference
|
||||
|
||||
| Resource | Location |
|
||||
| :--- | :--- |
|
||||
| Deploy playbook | `ansible/playbooks/docker/deploy_authentik.yml` |
|
||||
| Stack template | `ansible/templates/stacks/authentik.stack.yml` |
|
||||
| Shared variables | `ansible/group_vars/all.yml` |
|
||||
| Vault secrets | `ansible/group_vars/vault/all.yml` |
|
||||
| Authentik docs | <https://goauthentik.io/docs> |
|
||||
| Authentik changelog | <https://github.com/goauthentik/authentik/releases> |
|
||||
| Swarm cluster baseline | `outputs/swarm_audit_20260314T122134.md` |
|
||||
@ -0,0 +1,245 @@
|
||||
# Docker Environment Management Playbook
|
||||
|
||||
## Overview
|
||||
|
||||
The `manage_docker_environment.yml` playbook provides comprehensive Docker management capabilities for your homelab, including installation, configuration, container management, health monitoring, and maintenance tasks.
|
||||
|
||||
## Target Hosts
|
||||
|
||||
- **Primary:** `docker_hosts` group (includes docker-01 at 10.0.0.251)
|
||||
- Can be run against any host in the `ubuntu_lab` group
|
||||
|
||||
## Features
|
||||
|
||||
### 1. Docker Installation
|
||||
- Installs Docker CE with all required components
|
||||
- Includes Docker Compose plugin
|
||||
- Installs Docker BuildKit
|
||||
- Configures Docker service for auto-start
|
||||
|
||||
### 2. Configuration Management
|
||||
- Configures Docker daemon with logging limits
|
||||
- Adds specified users to the docker group
|
||||
- Sets up storage driver (overlay2)
|
||||
- Creates custom Docker networks
|
||||
|
||||
### 3. Container Management
|
||||
- Lists all running containers
|
||||
- Creates standard networks (backend, frontend)
|
||||
- Provides container inventory
|
||||
|
||||
### 4. Health Monitoring
|
||||
- Checks Docker disk usage
|
||||
- Identifies unhealthy containers
|
||||
- Reports system status
|
||||
|
||||
### 5. Maintenance & Cleanup
|
||||
- Removes stopped containers
|
||||
- Prunes unused images
|
||||
- Cleans up unused volumes
|
||||
- Removes orphaned networks
|
||||
|
||||
### 6. Configuration Backup
|
||||
- Backs up docker-compose files
|
||||
- Creates timestamped copies in `/opt/docker-backups`
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Execution
|
||||
|
||||
```bash
|
||||
# Run all tasks
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml
|
||||
|
||||
# Check mode (dry run)
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml --check
|
||||
|
||||
# Run with specific tags
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml --tags "health,monitoring"
|
||||
```
|
||||
|
||||
### Available Tags
|
||||
|
||||
| Tag | Description |
|
||||
| :--- | :--- |
|
||||
| `install` | Docker installation tasks |
|
||||
| `setup` | Installation + configuration |
|
||||
| `config` | Configuration management only |
|
||||
| `containers` | Container management tasks |
|
||||
| `management` | Container inventory and network setup |
|
||||
| `health` | Health checks and monitoring |
|
||||
| `monitoring` | Same as health |
|
||||
| `maintenance` | Cleanup and pruning tasks |
|
||||
| `cleanup` | Same as maintenance |
|
||||
| `backup` | Configuration backup tasks |
|
||||
|
||||
### Tag Combinations
|
||||
|
||||
```bash
|
||||
# Install and configure Docker (first run)
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml --tags "install,config"
|
||||
|
||||
# Daily health check
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml --tags "health"
|
||||
|
||||
# Weekly maintenance
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml --tags "maintenance" \
|
||||
-e "docker_cleanup_enabled=true"
|
||||
|
||||
# Full system audit
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml --tags "containers,health"
|
||||
```
|
||||
|
||||
## Configuration Variables
|
||||
|
||||
### Docker Users
|
||||
|
||||
```yaml
|
||||
docker_users:
|
||||
- chester
|
||||
- additional_user
|
||||
```
|
||||
|
||||
### Daemon Configuration
|
||||
|
||||
```yaml
|
||||
docker_daemon_options:
|
||||
log-driver: "json-file"
|
||||
log-opts:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
storage-driver: "overlay2"
|
||||
insecure-registries:
|
||||
- "registry.local:5000"
|
||||
```
|
||||
|
||||
### Cleanup Settings
|
||||
|
||||
```yaml
|
||||
# Enable cleanup tasks (default: false for safety)
|
||||
docker_cleanup_enabled: true
|
||||
|
||||
# Remove images older than X days
|
||||
docker_cleanup_older_than_days: 30
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### First-Time Setup
|
||||
|
||||
```bash
|
||||
# Install Docker on new host
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml \
|
||||
--limit docker-01 \
|
||||
--tags "install,config"
|
||||
```
|
||||
|
||||
### Regular Maintenance Workflow
|
||||
|
||||
```bash
|
||||
# 1. Check health status
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml \
|
||||
--tags "health"
|
||||
|
||||
# 2. Review disk usage, then run cleanup if needed
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml \
|
||||
--tags "maintenance" \
|
||||
-e "docker_cleanup_enabled=true"
|
||||
|
||||
# 3. Backup configurations
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml \
|
||||
--tags "backup"
|
||||
```
|
||||
|
||||
### Add Custom Networks
|
||||
|
||||
```yaml
|
||||
# In the playbook or as extra vars:
|
||||
docker_networks:
|
||||
- name: web_tier
|
||||
driver: bridge
|
||||
- name: database_tier
|
||||
driver: bridge
|
||||
internal: true
|
||||
```
|
||||
|
||||
## Safety Features
|
||||
|
||||
- **Cleanup Disabled by Default:** Cleanup tasks require explicit enabling via `docker_cleanup_enabled=true`
|
||||
- **Check Mode Compatible:** All tasks support `--check` for dry-run testing
|
||||
- **Idempotent:** Can be run multiple times safely
|
||||
- **Non-Destructive Monitoring:** Health checks don't modify system state
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Ubuntu/Debian-based system
|
||||
- SSH access with sudo privileges
|
||||
- Python 3 with pip available
|
||||
- Internet connection for package downloads
|
||||
|
||||
## Post-Execution
|
||||
|
||||
After running the playbook:
|
||||
|
||||
1. **Verify Docker installation:**
|
||||
```bash
|
||||
ssh chester@10.0.0.251 "docker --version && docker compose version"
|
||||
```
|
||||
|
||||
2. **Test Docker without sudo:**
|
||||
```bash
|
||||
ssh chester@10.0.0.251 "docker ps"
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> Users may need to log out and back in for group membership changes to take effect.
|
||||
|
||||
3. **Check Docker status:**
|
||||
```bash
|
||||
ssh chester@10.0.0.251 "sudo systemctl status docker"
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Docker service won't start
|
||||
|
||||
```bash
|
||||
# Check Docker daemon logs
|
||||
ssh chester@10.0.0.251 "sudo journalctl -u docker -n 50"
|
||||
|
||||
# Validate daemon.json syntax
|
||||
ssh chester@10.0.0.251 "sudo cat /etc/docker/daemon.json | jq ."
|
||||
```
|
||||
|
||||
### Permission denied errors
|
||||
|
||||
```bash
|
||||
# Verify group membership
|
||||
ssh chester@10.0.0.251 "groups"
|
||||
|
||||
# Force group update (requires re-login)
|
||||
ssh chester@10.0.0.251 "newgrp docker"
|
||||
```
|
||||
|
||||
### High disk usage
|
||||
|
||||
```bash
|
||||
# Run cleanup manually
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml \
|
||||
--tags "maintenance" \
|
||||
-e "docker_cleanup_enabled=true"
|
||||
```
|
||||
|
||||
## Integration with Other Playbooks
|
||||
|
||||
This playbook works alongside:
|
||||
|
||||
- [init_swarm_cluster.yml](../../playbooks/init_swarm_cluster.yml) - Run Docker setup first
|
||||
- [bootstrap_ai_workstation.yml](../../playbooks/bootstrap_ai_workstation.yml) - Can install Docker as dependency
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Deploy Applications:** Create docker-compose files in `/opt/docker/`
|
||||
2. **Set Up Monitoring:** Integrate with Prometheus/Grafana
|
||||
3. **Automate Backups:** Schedule regular configuration backups
|
||||
4. **Container Orchestration:** Consider Swarm or K3s for multi-host deployments
|
||||
347
ansible/ansible-old/documentation/playbooks/mount_nfs_shares.md
Normal file
347
ansible/ansible-old/documentation/playbooks/mount_nfs_shares.md
Normal file
@ -0,0 +1,347 @@
|
||||
# Mount NFS Shares
|
||||
|
||||
**Playbook:** `playbooks/storage/mount_nfs_shares.yml`
|
||||
**Purpose:** Configure NFS client mounts on Docker Swarm nodes for persistent storage
|
||||
**Target:** All Swarm nodes (managers + workers)
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
This playbook configures NFS mounts from the TerraMaster NAS to Docker Swarm nodes, providing shared storage for application data and media files. It ensures all nodes have consistent access to centralized storage while maintaining the storage contract principle that NAS is not a dependency for Swarm control-plane operations.
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### On TerraMaster NAS (10.0.0.250)
|
||||
|
||||
* NFS service enabled
|
||||
* Two NFS exports configured:
|
||||
* `/Volume1/appdata` — Application data, configs, persistent volumes
|
||||
* `/Volume2/media` — Media files (Plex, etc.)
|
||||
* NFS permissions allow access from Swarm subnet (10.0.0.0/24)
|
||||
|
||||
### On Swarm Nodes
|
||||
|
||||
* Ubuntu 24.04 LTS (Noble)
|
||||
* SSH access as `chester` user with sudo privileges
|
||||
* Network connectivity to TerraMaster on port 2049 (NFS)
|
||||
|
||||
---
|
||||
|
||||
## What It Does
|
||||
|
||||
1. **Installs NFS client** — `nfs-common` package
|
||||
2. **Creates mount points** — `/mnt/homelab` and `/mnt/media`
|
||||
3. **Configures fstab** — Persistent mounts survive reboots
|
||||
4. **Mounts shares immediately** — Makes storage available without reboot
|
||||
5. **Verifies accessibility** — Tests that mounts are readable
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
### Run on all Swarm nodes
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml
|
||||
```
|
||||
|
||||
### Run with specific tags
|
||||
|
||||
```bash
|
||||
# Only install packages and create directories
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml --tags setup
|
||||
|
||||
# Only update fstab (no mount action)
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml --tags config
|
||||
|
||||
# Mount without fstab changes (testing)
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml --tags mount
|
||||
|
||||
# Verify existing mounts
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml --tags verify
|
||||
```
|
||||
|
||||
### Limit to specific nodes
|
||||
|
||||
```bash
|
||||
# Only managers
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml --limit swarm_managers
|
||||
|
||||
# Only workers
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml --limit swarm_workers
|
||||
|
||||
# Single node
|
||||
ansible-playbook playbooks/storage/mount_nfs_shares.yml --limit swarm-worker-1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
### Variables
|
||||
|
||||
Defined in the playbook (`vars` section):
|
||||
|
||||
| Variable | Value | Description |
|
||||
|----------|-------|-------------|
|
||||
| `nfs_server` | `10.0.0.250` | TerraMaster NAS IP address |
|
||||
| `nfs_mounts[0].src` | `/Volume1/appdata` | NFS export path for application data |
|
||||
| `nfs_mounts[0].dest` | `/mnt/homelab` | Local mount point for app data |
|
||||
| `nfs_mounts[1].src` | `/Volume2/media` | NFS export path for media |
|
||||
| `nfs_mounts[1].dest` | `/mnt/media` | Local mount point for media |
|
||||
| `nfs_mounts[*].opts` | `defaults` | Mount options |
|
||||
|
||||
### Customizing Mount Options
|
||||
|
||||
To change mount options (e.g., add `noatime` for performance):
|
||||
|
||||
```yaml
|
||||
nfs_mounts:
|
||||
- src: "/Volume1/appdata"
|
||||
dest: "/mnt/homelab"
|
||||
opts: "defaults,noatime,rw"
|
||||
```
|
||||
|
||||
Common NFS options:
|
||||
- `noatime` — Don't update access times (performance)
|
||||
- `hard` — Retry indefinitely if NFS server unavailable (default)
|
||||
- `soft` — Fail after timeout (risky for data integrity)
|
||||
- `rsize=8192,wsize=8192` — Adjust read/write buffer sizes
|
||||
- `nfsvers=4` — Force NFSv4 (recommended)
|
||||
|
||||
---
|
||||
|
||||
## Using NFS Mounts in Docker
|
||||
|
||||
### Method 1: Bind Mounts (Current Approach)
|
||||
|
||||
**Docker Compose:**
|
||||
```yaml
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
volumes:
|
||||
- /mnt/homelab/appdata/myapp:/data
|
||||
- /mnt/media:/media:ro # Read-only for safety
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Simple and transparent
|
||||
- Easy to debug with standard Linux tools
|
||||
- One mount serves all containers
|
||||
|
||||
**Cons:**
|
||||
- Services coupled to host filesystem paths
|
||||
- Must ensure mount exists before container starts
|
||||
|
||||
---
|
||||
|
||||
### Method 2: Docker NFS Volumes (Alternative)
|
||||
|
||||
**Docker Compose:**
|
||||
```yaml
|
||||
volumes:
|
||||
homelab_data:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: nfs
|
||||
o: addr=10.0.0.250,rw,nfsvers=4
|
||||
device: ":/Volume1/appdata"
|
||||
|
||||
media:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: nfs
|
||||
o: addr=10.0.0.250,ro,nfsvers=4
|
||||
device: ":/Volume2/media"
|
||||
|
||||
services:
|
||||
app:
|
||||
image: myapp:latest
|
||||
volumes:
|
||||
- homelab_data:/data
|
||||
- media:/media:ro
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Portable volume names (no hardcoded paths)
|
||||
- Docker manages mount lifecycle
|
||||
- Per-service isolation possible
|
||||
- Automatic retry on NFS failure
|
||||
|
||||
**Cons:**
|
||||
- More complex configuration
|
||||
- Harder to inspect with standard tools
|
||||
- Must define volumes in every compose file
|
||||
|
||||
---
|
||||
|
||||
### Recommendation
|
||||
|
||||
**Use bind mounts (Method 1)** for now:
|
||||
- You already have working fstab configuration
|
||||
- Simpler to manage across 6 nodes
|
||||
- Better visibility for troubleshooting
|
||||
- Can switch to Docker volumes later if needed
|
||||
|
||||
---
|
||||
|
||||
## Verification
|
||||
|
||||
### Check mount status
|
||||
|
||||
```bash
|
||||
# On any Swarm node
|
||||
df -h | grep mnt
|
||||
|
||||
# Expected output:
|
||||
# 10.0.0.250:/Volume1/appdata 500G 100G 400G 20% /mnt/homelab
|
||||
# 10.0.0.250:/Volume2/media 2.0T 500G 1.5T 25% /mnt/media
|
||||
```
|
||||
|
||||
### Test write access
|
||||
|
||||
```bash
|
||||
# On a Swarm node
|
||||
sudo touch /mnt/homelab/test-write
|
||||
ls -l /mnt/homelab/test-write
|
||||
sudo rm /mnt/homelab/test-write
|
||||
```
|
||||
|
||||
### Check fstab persistence
|
||||
|
||||
```bash
|
||||
cat /etc/fstab | grep mnt
|
||||
# Should show both NFS entries
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Mount fails with "Connection refused"
|
||||
|
||||
**Cause:** NFS service not running or firewall blocking port 2049
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Test NFS connectivity
|
||||
showmount -e 10.0.0.250
|
||||
|
||||
# If fails, check TerraMaster NFS settings
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Mount fails with "Permission denied"
|
||||
|
||||
**Cause:** NFS export permissions don't allow Swarm node IPs
|
||||
|
||||
**Solution:** Update TerraMaster NFS export to allow `10.0.0.0/24` subnet
|
||||
|
||||
---
|
||||
|
||||
### Mount succeeds but directory is empty
|
||||
|
||||
**Cause:** Mounted wrong export path or path doesn't exist on NAS
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# List available exports
|
||||
showmount -e 10.0.0.250
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Mount exists but containers can't write
|
||||
|
||||
**Cause:** NFS mounted read-only or wrong permissions
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Check mount options
|
||||
mount | grep "/mnt/homelab"
|
||||
|
||||
# Remount with write permissions if needed
|
||||
sudo mount -o remount,rw /mnt/homelab
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Stale NFS file handle errors
|
||||
|
||||
**Cause:** NFS server restarted or export changed
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Unmount and remount
|
||||
sudo umount -f /mnt/homelab
|
||||
sudo mount -a
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Safety Considerations
|
||||
|
||||
### Storage Contract Compliance
|
||||
|
||||
✅ **Compliant:**
|
||||
- Mounting NFS on all nodes for data access
|
||||
- Using NAS for application data (not control-plane state)
|
||||
- Swarm can operate if NFS is temporarily unavailable
|
||||
|
||||
❌ **Violations to avoid:**
|
||||
- Don't store Swarm raft data on NFS
|
||||
- Don't run manager services that require NFS to stay healthy
|
||||
- Don't use NFS for `/var/lib/docker` or other system paths
|
||||
|
||||
---
|
||||
|
||||
### Backup Verification
|
||||
|
||||
Per storage contract:
|
||||
- Data on `/mnt/homelab` backed up via TerraMaster → Synology rsync
|
||||
- Verify backup jobs are running: Check Synology logs
|
||||
- Test restores periodically
|
||||
|
||||
---
|
||||
|
||||
## Maintenance
|
||||
|
||||
### Adding new NFS shares
|
||||
|
||||
1. Configure export on TerraMaster
|
||||
2. Add entry to `nfs_mounts` list in playbook
|
||||
3. Run playbook with `--tags setup,config,mount`
|
||||
|
||||
### Removing NFS shares
|
||||
|
||||
1. Unmount: `sudo umount /mnt/someshare`
|
||||
2. Remove from `/etc/fstab`
|
||||
3. Remove directory: `sudo rmdir /mnt/someshare`
|
||||
|
||||
---
|
||||
|
||||
## Related Documentation
|
||||
|
||||
- [Storage Contract](../contracts/storage.md) — NAS roles and backup policy
|
||||
- [Environment Constraints](../standards/environment-constraints.md) — Network and hardware specs
|
||||
- [Architecture Decisions](../../documentation/standards/architecture-decisions.md) — ADR-003 (Watchtower role)
|
||||
|
||||
---
|
||||
|
||||
## Tags Reference
|
||||
|
||||
| Tag | Purpose |
|
||||
|-----|---------|
|
||||
| `setup` | Install packages, create directories |
|
||||
| `packages` | Install NFS client only |
|
||||
| `filesystem` | Create mount point directories only |
|
||||
| `config` | Update fstab only |
|
||||
| `fstab` | Alias for `config` |
|
||||
| `mount` | Execute mount operations |
|
||||
| `verify` | Test mounts and display status |
|
||||
@ -0,0 +1,153 @@
|
||||
# Ansible secrets onboarding playbook
|
||||
|
||||
## Overview
|
||||
|
||||
This guide onboards secret management for passwords, API keys, and tokens using
|
||||
Ansible Vault. It defines a repeatable workflow for creating encrypted variable
|
||||
files, loading them safely in playbooks, and consuming secrets with idempotent
|
||||
Ansible modules.
|
||||
|
||||
## What this establishes
|
||||
|
||||
### 1. Standard secret file layout
|
||||
|
||||
- `group_vars/<group>/vault.yml` for group-level secrets
|
||||
- `host_vars/<host>/vault.yml` for host-level secrets
|
||||
- Secret variable names with `_pass` or `_secret` suffixes
|
||||
|
||||
### 2. Encrypted-at-rest secret storage
|
||||
|
||||
- Secrets are created and edited with `ansible-vault`
|
||||
- Plaintext secrets are not committed to Git
|
||||
- Existing ignore rules in [ansible/.gitignore](../../.gitignore) protect vault
|
||||
files from accidental commits
|
||||
|
||||
### 3. Safe secret consumption patterns
|
||||
|
||||
- Use `ansible.builtin.template`, `ansible.builtin.copy`, and
|
||||
`ansible.builtin.lineinfile` instead of ad-hoc shell commands
|
||||
- Mark sensitive tasks with `no_log: true`
|
||||
- Set explicit file ownership and mode for rendered secret files
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Ansible installed on the control node
|
||||
- Access to [ansible.cfg](../../ansible.cfg) and your inventory
|
||||
- A vault password strategy:
|
||||
- Interactive prompt (`--ask-vault-pass`) for manual runs
|
||||
- Password file (`--vault-password-file`) for controlled automation
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Do not store vault passwords in repository files or plaintext notes.
|
||||
|
||||
## Step-by-step onboarding
|
||||
|
||||
### Step 1: Create vault files
|
||||
|
||||
```bash
|
||||
# Group-level secrets
|
||||
ansible-vault create group_vars/docker/vault.yml
|
||||
|
||||
# Host-level secrets
|
||||
ansible-vault create host_vars/docker-01/vault.yml
|
||||
```
|
||||
|
||||
### Step 2: Add secrets with naming conventions
|
||||
|
||||
```yaml
|
||||
# group_vars/docker/vault.yml
|
||||
grafana_admin_pass: "replace-me"
|
||||
watchtower_api_key_secret: "replace-me"
|
||||
```
|
||||
|
||||
### Step 3: Reference secrets in playbooks or roles
|
||||
|
||||
```yaml
|
||||
# playbooks/example.yml
|
||||
- name: Configure app secrets
|
||||
hosts: docker_hosts
|
||||
become: true
|
||||
tasks:
|
||||
- name: Render application environment file
|
||||
ansible.builtin.template:
|
||||
src: templates/app.env.j2
|
||||
dest: /opt/app/.env
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0600"
|
||||
no_log: true
|
||||
```
|
||||
|
||||
```jinja2
|
||||
# templates/app.env.j2
|
||||
GRAFANA_ADMIN_PASSWORD={{ grafana_admin_pass }}
|
||||
WATCHTOWER_API_KEY={{ watchtower_api_key_secret }}
|
||||
```
|
||||
|
||||
### Step 4: Run with vault decryption
|
||||
|
||||
```bash
|
||||
# Interactive
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/example.yml --ask-vault-pass
|
||||
|
||||
# Automated (secured local file)
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/example.yml \
|
||||
--vault-password-file ~/.ansible/.vault-pass
|
||||
```
|
||||
|
||||
### Step 5: Verify idempotency and secrecy
|
||||
|
||||
```bash
|
||||
# Syntax check
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/example.yml --syntax-check
|
||||
|
||||
# Idempotency check (run twice; second run should be unchanged)
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/example.yml --ask-vault-pass
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/example.yml --ask-vault-pass
|
||||
```
|
||||
|
||||
## Why module-first instead of shell
|
||||
|
||||
- `ansible.builtin.template` and `ansible.builtin.copy` are idempotent and track
|
||||
file diffs
|
||||
- Explicit `owner`, `group`, and `mode` improve auditability
|
||||
- `shell` can leak secrets into command history and logs if not handled
|
||||
carefully
|
||||
- Module output is safer to control with `no_log: true`
|
||||
|
||||
## Security guardrails
|
||||
|
||||
- Keep `no_log: true` on any task that reads, writes, or debugs secret values
|
||||
- Never print secret variables with `ansible.builtin.debug`
|
||||
- Scope secrets to the narrowest level possible (host before group when needed)
|
||||
- Rotate credentials by updating vault values and re-running playbooks
|
||||
- Prefer separate vault files per scope to limit blast radius
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Decryption failed
|
||||
|
||||
```bash
|
||||
ansible-vault view group_vars/docker/vault.yml
|
||||
```
|
||||
|
||||
Use the same vault password source used during file creation.
|
||||
|
||||
### Variable is undefined
|
||||
|
||||
- Confirm secret file path matches inventory group/host names
|
||||
- Confirm variable names match exactly in templates and tasks
|
||||
- Run with `-vv` and inspect which variable files loaded
|
||||
|
||||
### Secret file committed by mistake
|
||||
|
||||
1. Rotate affected credentials immediately
|
||||
2. Remove file from tracking
|
||||
3. Rewrite Git history if secrets were pushed to remote
|
||||
|
||||
## Integration notes
|
||||
|
||||
- Follow the quality checklist in
|
||||
[Ansible quality gates](../standards/ansible-quality-gates.md)
|
||||
- Keep naming aligned with
|
||||
[Naming conventions](../standards/naming-conventions.md)
|
||||
363
ansible/ansible-old/documentation/playbooks/onboard_new_host.md
Normal file
363
ansible/ansible-old/documentation/playbooks/onboard_new_host.md
Normal file
@ -0,0 +1,363 @@
|
||||
# Non-Proxmox Host Onboarding Playbook
|
||||
|
||||
## Overview
|
||||
|
||||
The `playbooks/onboarding/generic_host.yml` playbook automates bootstrap for non-Proxmox hosts and supports two profiles:
|
||||
|
||||
- `new`: full onboarding with security hardening.
|
||||
- `existing`: safe onboarding for pre-existing production hosts (key setup, Python, sudo, packages; skips SSH hardening).
|
||||
|
||||
Use `existing` for live systems like `10.0.0.151` (Traefik) and `10.0.0.251`.
|
||||
|
||||
## What It Does
|
||||
|
||||
### 1. Connectivity Test
|
||||
- Verifies SSH connection to target host
|
||||
- Uses raw commands (no Python required initially)
|
||||
- Provides clear error messages if connection fails
|
||||
|
||||
### 2. SSH Key Authentication
|
||||
- Creates `.ssh` directory with correct permissions
|
||||
- Copies your public SSH key to `authorized_keys`
|
||||
- Validates passwordless SSH authentication
|
||||
|
||||
### 3. Python & Prerequisites
|
||||
- Installs Python3 if not present
|
||||
- Installs `python3-apt` for Ansible module support
|
||||
- Gathers system facts
|
||||
|
||||
### 4. Passwordless Sudo
|
||||
- Creates sudoers configuration for your user
|
||||
- Validates sudo configuration syntax
|
||||
- Tests passwordless sudo access
|
||||
|
||||
### 5. Essential Packages
|
||||
- Updates apt cache
|
||||
- Installs essential tools (git, vim, curl, htop, etc.)
|
||||
|
||||
### 6. Basic Security
|
||||
- Disables root SSH login
|
||||
- Disables password authentication (SSH keys only)
|
||||
- Configures UFW firewall (allows SSH)
|
||||
|
||||
### 7. Final Validation
|
||||
- Tests complete passwordless authentication
|
||||
- Displays comprehensive onboarding summary
|
||||
|
||||
## Usage
|
||||
|
||||
### Method 1: Existing production hosts (safe profile)
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=docker_hosts" \
|
||||
-e "onboard_user=chester" \
|
||||
-e "onboarding_profile=existing" \
|
||||
-k -K
|
||||
```
|
||||
|
||||
This is the recommended process for hosts that already run production workloads.
|
||||
|
||||
### Method 2: Net-new host onboarding (full hardening)
|
||||
|
||||
```bash
|
||||
# Onboard a single host
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=docker-01" \
|
||||
-e "onboard_user=chester" \
|
||||
-e "onboarding_profile=new" \
|
||||
-k -K
|
||||
|
||||
# -k: Prompt for SSH password
|
||||
# -K: Prompt for sudo password
|
||||
```
|
||||
|
||||
### Method 3: Using Environment Variables
|
||||
|
||||
```bash
|
||||
# Set credentials via environment
|
||||
export ANSIBLE_SSH_PASS='your_password'
|
||||
export ANSIBLE_BECOME_PASS='your_password'
|
||||
|
||||
# Run playbook
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=docker-01"
|
||||
```
|
||||
|
||||
### Method 4: Onboard Multiple Hosts
|
||||
|
||||
```bash
|
||||
# Add new hosts to inventory first
|
||||
# Then onboard them all at once
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=new_servers" \
|
||||
-e "onboarding_profile=existing" \
|
||||
-k -K
|
||||
|
||||
# Where 'new_servers' is a group in your inventory
|
||||
```
|
||||
|
||||
## Required Variables
|
||||
|
||||
| Variable | Description | Default |
|
||||
| :--- | :--- | :--- |
|
||||
| `target_host` | Host or group to onboard | `all` |
|
||||
| `onboard_user` | Username for SSH/sudo | `chester` |
|
||||
| `onboarding_profile` | `new` (harden) or `existing` (safe) | `new` |
|
||||
| `onboard_password` | SSH and sudo password | From env or prompt |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### On Your Control Machine (jumpbox)
|
||||
- SSH key pair exists (`~/.ssh/id_ed25519`)
|
||||
- Ansible installed
|
||||
- Network connectivity to target host
|
||||
|
||||
### On Target Host
|
||||
- SSH server running
|
||||
- User account with sudo privileges
|
||||
- Network connectivity from control machine
|
||||
|
||||
## Step-by-Step First-Time Onboarding
|
||||
|
||||
### Step 1: Add Host to Inventory
|
||||
|
||||
```ini
|
||||
# inventory/hosts.ini
|
||||
[new_hosts]
|
||||
new-server ansible_host=10.0.0.252
|
||||
```
|
||||
|
||||
### Step 2: Test Connectivity
|
||||
|
||||
```bash
|
||||
# Verify SSH access manually first
|
||||
ssh chester@10.0.0.252
|
||||
```
|
||||
|
||||
### Step 3: Run Onboarding Playbook
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=new-server" \
|
||||
-e "onboard_user=chester" \
|
||||
-e "onboarding_profile=new" \
|
||||
-k -K
|
||||
```
|
||||
|
||||
### Step 4: Verify Passwordless Access
|
||||
|
||||
```bash
|
||||
# Test Ansible ping without password
|
||||
ansible -i inventory/hosts.ini new-server -m ping
|
||||
|
||||
# Test SSH without password
|
||||
ssh chester@10.0.0.252 'sudo whoami'
|
||||
```
|
||||
|
||||
## Tag-Based Execution
|
||||
|
||||
Run specific sections only:
|
||||
|
||||
```bash
|
||||
# Test connectivity only
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=docker-01" \
|
||||
--tags "connectivity" \
|
||||
-k
|
||||
|
||||
# Setup SSH keys only
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=docker-01" \
|
||||
--tags "ssh" \
|
||||
-k -K
|
||||
|
||||
# Skip security hardening
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=docker-01" \
|
||||
--skip-tags "security" \
|
||||
-k -K
|
||||
```
|
||||
|
||||
### Available Tags
|
||||
|
||||
| Tag | Section |
|
||||
| :--- | :--- |
|
||||
| `connectivity` | Connection test |
|
||||
| `test` | Connection test |
|
||||
| `ssh` | SSH key setup |
|
||||
| `setup` | All setup tasks |
|
||||
| `python` | Python installation |
|
||||
| `prerequisites` | Package prerequisites |
|
||||
| `sudo` | Passwordless sudo |
|
||||
| `packages` | Essential packages |
|
||||
| `security` | Security hardening |
|
||||
| `hardening` | Security hardening |
|
||||
| `validate` | Final validation |
|
||||
| `summary` | Onboarding summary |
|
||||
|
||||
## Expected Output
|
||||
|
||||
```
|
||||
PLAY [Onboard New Host to Ansible Management] ************************************
|
||||
|
||||
TASK [Test raw connection (no Python required)] **********************************
|
||||
ok: [docker-01]
|
||||
|
||||
TASK [Display connection status] *************************************************
|
||||
ok: [docker-01] => {
|
||||
"msg": "✅ Successfully connected to docker-01"
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
TASK [Display onboarding summary] ************************************************
|
||||
ok: [docker-01] => {
|
||||
"msg": [
|
||||
"════════════════════════════════════════════════",
|
||||
"✅ HOST ONBOARDING COMPLETE",
|
||||
"════════════════════════════════════════════════",
|
||||
"Host: docker-01 (waldorf)",
|
||||
"IP: 10.0.0.251",
|
||||
"OS: Ubuntu 24.04",
|
||||
"Python: 3.12.3",
|
||||
"SSH Key Auth: ✅ Enabled",
|
||||
"Passwordless Sudo: ✅ Enabled",
|
||||
"Ansible User: chester",
|
||||
"════════════════════════════════════════════════"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### SSH Connection Failed
|
||||
|
||||
```bash
|
||||
# Test manual SSH first
|
||||
ssh chester@10.0.0.252
|
||||
|
||||
# Check SSH service on target
|
||||
ssh chester@10.0.0.252 'sudo systemctl status sshd'
|
||||
|
||||
# Verify firewall allows SSH
|
||||
ssh chester@10.0.0.252 'sudo ufw status'
|
||||
```
|
||||
|
||||
### Python Installation Failed
|
||||
|
||||
```bash
|
||||
# Manually install Python
|
||||
ssh chester@10.0.0.252 'sudo apt-get update && sudo apt-get install -y python3'
|
||||
```
|
||||
|
||||
### Sudo Password Prompt Still Appears
|
||||
|
||||
```bash
|
||||
# Check sudoers configuration
|
||||
ssh chester@10.0.0.252 'sudo cat /etc/sudoers.d/chester'
|
||||
|
||||
# Verify syntax
|
||||
ssh chester@10.0.0.252 'sudo visudo -c'
|
||||
```
|
||||
|
||||
### SSH Key Not Working After Setup
|
||||
|
||||
```bash
|
||||
# Check authorized_keys permissions
|
||||
ssh chester@10.0.0.252 'ls -la ~/.ssh/authorized_keys'
|
||||
|
||||
# Should be: -rw------- (600)
|
||||
|
||||
# Check SSH config on target
|
||||
ssh chester@10.0.0.252 'sudo grep -E "PubkeyAuthentication|PasswordAuthentication" /etc/ssh/sshd_config'
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### SSH Hardening Applied
|
||||
|
||||
- ✅ Root login disabled
|
||||
- ✅ Password authentication disabled (after key setup)
|
||||
- ✅ SSH keys required for all access
|
||||
|
||||
### Post-Onboarding Recommendations
|
||||
|
||||
1. **Review SSH Configuration**
|
||||
```bash
|
||||
ssh chester@host 'sudo sshd -T | grep -E "permit|password|pubkey"'
|
||||
```
|
||||
|
||||
2. **Configure Firewall Rules**
|
||||
```bash
|
||||
# Allow only required services
|
||||
ssh chester@host 'sudo ufw allow 22/tcp && sudo ufw enable'
|
||||
```
|
||||
|
||||
3. **Enable Automatic Security Updates**
|
||||
```bash
|
||||
ssh chester@host 'sudo apt-get install unattended-upgrades'
|
||||
```
|
||||
|
||||
4. **Set Up Fail2Ban**
|
||||
```bash
|
||||
ssh chester@host 'sudo apt-get install fail2ban'
|
||||
```
|
||||
|
||||
## Integration with Other Playbooks
|
||||
|
||||
After onboarding, you can run any playbook without passwords:
|
||||
|
||||
```bash
|
||||
# Install Docker
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/manage_docker_environment.yml \
|
||||
--limit new-server \
|
||||
--tags "install"
|
||||
|
||||
# Configure networking
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/baseline_network_config.yml \
|
||||
--limit new-server
|
||||
```
|
||||
|
||||
## Bulk Onboarding Workflow
|
||||
|
||||
For onboarding multiple hosts at once:
|
||||
|
||||
### 1. Create Temporary Inventory
|
||||
|
||||
```ini
|
||||
# inventory/new-hosts.ini
|
||||
[pending_onboard]
|
||||
server-01 ansible_host=10.0.0.101
|
||||
server-02 ansible_host=10.0.0.102
|
||||
server-03 ansible_host=10.0.0.103
|
||||
|
||||
[pending_onboard:vars]
|
||||
ansible_user=chester
|
||||
```
|
||||
|
||||
### 2. Run Onboarding
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory/new-hosts.ini playbooks/onboarding/generic_host.yml \
|
||||
-e "target_host=pending_onboard" \
|
||||
-e "onboarding_profile=existing" \
|
||||
-k -K
|
||||
```
|
||||
|
||||
### 3. Merge into Main Inventory
|
||||
|
||||
After successful onboarding, add hosts to your main [inventory/hosts.ini](../../inventory/hosts.ini) file.
|
||||
|
||||
## Next Steps
|
||||
|
||||
After successful onboarding:
|
||||
|
||||
1. **Assign to appropriate groups** in [inventory/hosts.ini](../../inventory/hosts.ini)
|
||||
2. **Configure group_vars** for role-specific settings
|
||||
3. **Run role-specific playbooks** (Docker, networking, etc.)
|
||||
4. **Deploy monitoring exporter for standalone hosts**
|
||||
```bash
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml --tags docker-hosts
|
||||
```
|
||||
5. **Document host purpose** in your infrastructure documentation
|
||||
@ -0,0 +1,236 @@
|
||||
# Watchtower monitoring onboarding and self-healing runbook
|
||||
|
||||
## Purpose
|
||||
|
||||
This runbook is the operator path for deploying, validating, and maintaining the full
|
||||
Watchtower monitoring stack.
|
||||
|
||||
It covers:
|
||||
|
||||
- Monitoring stack onboarding (all services).
|
||||
- Integration points between services and external Traefik.
|
||||
- Day-1 troubleshooting, including Authentik outpost restart loops.
|
||||
- Self-healing execution with safe, repeatable reconciliation.
|
||||
|
||||
## Scope
|
||||
|
||||
The canonical Watchtower monitoring scope is:
|
||||
|
||||
- traefik-kop
|
||||
- Prometheus
|
||||
- Grafana
|
||||
- Uptime Kuma
|
||||
- node-exporter
|
||||
- watchtower-cadvisor
|
||||
- Dozzle
|
||||
- Authentik outpost for Dozzle
|
||||
- Loki
|
||||
- Promtail
|
||||
- blackbox-exporter
|
||||
|
||||
## Architecture summary
|
||||
|
||||
- External Traefik ingress runs on `10.0.0.151` and is not migrated into Swarm.
|
||||
- Swarm exporters run on Swarm nodes.
|
||||
- Watchtower hosts aggregation, storage, visualization, and logging services.
|
||||
- Traefik labels are used for HTTPS-routed UIs (Grafana, Dozzle, Uptime Kuma).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. Inventory groups are defined and reachable: `swarm_managers`, `swarm_workers`,
|
||||
`swarm_hosts`, and `watchtower`.
|
||||
2. Docker is installed on all target nodes.
|
||||
3. Overlay network `proxy-net` exists for Swarm workloads.
|
||||
4. Vault file exists at `ansible/group_vars/vault/all.yml` or equivalent secrets are
|
||||
provided through secure environment variables.
|
||||
5. Required secrets are present:
|
||||
- `vault_grafana_admin_password`
|
||||
- `vault_authentik_outpost_dozzle_token`
|
||||
|
||||
If Authentik token is not available yet, set `monitoring_enable_authentik_outpost=false`
|
||||
for bootstrap deployment and keep Dozzle private until token onboarding is complete.
|
||||
|
||||
> [!WARNING]
|
||||
> Never hardcode tokens or passwords in compose files, playbooks, or helper scripts.
|
||||
> Use Vault variables and rotate credentials if any plaintext secret was committed.
|
||||
|
||||
## Deployment order
|
||||
|
||||
1. Exporters on Swarm nodes (`node-exporter`, `cAdvisor`).
|
||||
2. Dozzle agent on Swarm managers.
|
||||
3. Watchtower stack (`traefik-kop`, Prometheus, Grafana, Uptime Kuma, Dozzle,
|
||||
Authentik outpost, Loki, Promtail).
|
||||
4. Post-deploy verification and dashboard bootstrap.
|
||||
|
||||
## Deploy commands
|
||||
|
||||
Run from `ansible/`:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml
|
||||
```
|
||||
|
||||
Target only Swarm exporters:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml --tags swarm
|
||||
```
|
||||
|
||||
Target only Watchtower stack:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml --tags watchtower
|
||||
```
|
||||
|
||||
## Service-by-service onboarding checks
|
||||
|
||||
### traefik-kop
|
||||
|
||||
- Verify service starts and can reach Redis endpoint `10.0.0.151:6379`.
|
||||
- Verify route updates are visible from external Traefik behavior.
|
||||
|
||||
### Prometheus
|
||||
|
||||
- Verify readiness endpoint:
|
||||
|
||||
```bash
|
||||
curl -fsS http://10.0.0.200:9091/-/ready
|
||||
```
|
||||
|
||||
- Verify targets include expected managers, workers, and Watchtower node-exporter.
|
||||
|
||||
### Grafana
|
||||
|
||||
- Verify HTTPS route at configured domain.
|
||||
- Confirm login with admin user and vault-provided password.
|
||||
- Add data sources:
|
||||
- Prometheus: `http://prometheus:9090`
|
||||
- Loki: `http://loki:3100`
|
||||
|
||||
### Uptime Kuma
|
||||
|
||||
- Verify HTTPS route and UI load.
|
||||
- Add core checks for:
|
||||
- External Traefik endpoint
|
||||
- Watchtower host health
|
||||
- Swarm manager API reachability
|
||||
|
||||
### node-exporter and cAdvisor
|
||||
|
||||
- Verify metrics endpoints are reachable from each node.
|
||||
- Confirm Prometheus scrape status is `up` for all exporters.
|
||||
- Verify local Watchtower cAdvisor endpoint:
|
||||
|
||||
```bash
|
||||
curl -fsS http://10.0.0.200:18080/metrics | head
|
||||
```
|
||||
|
||||
### Dozzle and Authentik outpost
|
||||
|
||||
- Verify Dozzle HTTPS route.
|
||||
- Verify Authentik outpost endpoint routing under `/outpost.goauthentik.io/`.
|
||||
- Verify forward-auth middleware is attached and blocking unauthenticated access.
|
||||
|
||||
### Loki and Promtail
|
||||
|
||||
- Verify Loki API health via container logs and ingestion behavior.
|
||||
- Verify Promtail discovers Docker logs and labels streams by project/service.
|
||||
|
||||
### blackbox-exporter (network and endpoint probes)
|
||||
|
||||
- Verify Blackbox exporter is reachable:
|
||||
|
||||
```bash
|
||||
curl -fsS http://10.0.0.200:9115/metrics | head
|
||||
```
|
||||
|
||||
- Verify Prometheus shows probe targets in `blackbox-probes` job.
|
||||
- Add probe targets through `monitoring_probe_targets` in group vars.
|
||||
|
||||
## Day-1 troubleshooting
|
||||
|
||||
### Authentik outpost restart loop
|
||||
|
||||
1. Verify token presence in rendered `.env` for stack directory.
|
||||
1. Confirm token matches active Authentik outpost token in Authentik admin.
|
||||
1. Confirm Traefik middleware label references the same outpost service.
|
||||
1. Check container logs:
|
||||
|
||||
```bash
|
||||
docker logs authentik-outpost-dozzle --tail 200
|
||||
```
|
||||
|
||||
1. Reconcile stack after token correction:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml --tags watchtower
|
||||
```
|
||||
|
||||
### Backlog item: Authentik token pending
|
||||
|
||||
1. Keep `monitoring_enable_authentik_outpost=false` while token is unavailable.
|
||||
1. Do not expose Dozzle publicly without Authentik forward-auth.
|
||||
1. Re-enable outpost after token handoff and re-run watchtower tag.
|
||||
|
||||
### Prometheus missing targets
|
||||
|
||||
1. Confirm inventory contains correct node IPs and groups.
|
||||
2. Re-run deployment to re-render scrape config.
|
||||
3. Query target API and inspect dropped targets.
|
||||
|
||||
### Blackbox probes failing
|
||||
|
||||
1. Confirm target is reachable from Watchtower network path.
|
||||
1. Confirm probe module matches target protocol (`icmp`, `tcp_connect`, `http_2xx`).
|
||||
1. Confirm Prometheus relabeling routes probes to `watchtower_ip:9115`.
|
||||
|
||||
### Dozzle cannot see remote logs
|
||||
|
||||
1. Confirm `dozzle-agent` service is healthy on manager nodes.
|
||||
2. Confirm remote agent endpoints and ports are reachable.
|
||||
3. Confirm Docker socket mount is present and read-only where expected.
|
||||
|
||||
## Self-healing model
|
||||
|
||||
Self-healing is implemented as scheduled reconciliation, not ad-hoc manual edits.
|
||||
|
||||
### Current helper script status
|
||||
|
||||
- `ansible/scripts/pi_pull_updates.sh` is retained as a helper and now expects
|
||||
configurable environment variables instead of embedded credentials.
|
||||
- `ansible/scripts/pi_init.sh` is optional for operator bootstrap and is not
|
||||
required for monitoring stack reconciliation.
|
||||
|
||||
### Recommended execution pattern
|
||||
|
||||
1. Use `ansible-pull` to sync and apply `ansible/playbooks/self-heal/watchtower.yml`.
|
||||
2. Run through a scheduler (prefer `systemd` timer for reliability and observability).
|
||||
3. Keep logs in a persistent path and alert on repeated failures.
|
||||
|
||||
Example manual run:
|
||||
|
||||
```bash
|
||||
REPO_URL=git@git.castaldifamily.com:nathan/homelab.git \
|
||||
PLAYBOOK_PATH=ansible/playbooks/self-heal/watchtower.yml \
|
||||
/home/chester/homelab/ansible/scripts/pi_pull_updates.sh
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> If your repository is private, use SSH deploy keys or vault-backed secret injection.
|
||||
> Do not place long-lived personal access tokens in script files.
|
||||
|
||||
## Idempotency and rollback
|
||||
|
||||
- Re-running deployment playbooks is expected and safe; desired state is reconciled.
|
||||
- Keep stack definitions in Git and avoid manual edits in `/opt/stacks`.
|
||||
- Rollback method:
|
||||
1. Revert the offending commit in Git.
|
||||
2. Re-run deployment playbook.
|
||||
3. Validate endpoints and target health.
|
||||
|
||||
## Operational safety rules
|
||||
|
||||
- Do not run services as root unless technically required and documented.
|
||||
- Avoid broad host mounts unless required for telemetry collection.
|
||||
- Keep exposed admin ports behind Traefik and authentication middleware.
|
||||
- Validate health and auth behavior before declaring changes complete.
|
||||
@ -0,0 +1,648 @@
|
||||
---
|
||||
title: "Prompt Repository Analysis Report"
|
||||
date: "2026-01-09"
|
||||
author: "FrankGPT v4"
|
||||
type: "Analysis"
|
||||
---
|
||||
|
||||
# Prompt Repository Analysis Report
|
||||
|
||||
## Executive Summary
|
||||
|
||||
Analyzed **26 prompt files** across the `.github/prompts/` directory. The repository contains a mix of production-ready, draft, and deprecated prompts with varying levels of sophistication.
|
||||
|
||||
**Key Findings:**
|
||||
- **Overlap Issues:** 7 prompts have significant overlap and can be converged
|
||||
- **Deprecated Content:** 3 "OLD.*" prompts should be archived or removed
|
||||
- **Draft Quality:** 4 draft prompts lack implementation detail
|
||||
- **Top 5 Adjustments Needed:** See Section 4 for detailed recommendations
|
||||
|
||||
---
|
||||
|
||||
## 1. Overlap Analysis: Convergence Opportunities
|
||||
|
||||
### 1.1 Service Management Workflows (High Overlap)
|
||||
|
||||
**Affected Prompts:**
|
||||
- `service-new.prompt.md`
|
||||
- `service-review.prompt.md`
|
||||
- `service-standardize.prompt.md`
|
||||
- `service-troubleshoot.prompt.md`
|
||||
- `service-decommission.prompt.md`
|
||||
- `service-migration.prompt.md`
|
||||
|
||||
**Analysis:**
|
||||
All six prompts share a common structure:
|
||||
- Gated, step-by-step workflow
|
||||
- Service-focused (Docker/Compose)
|
||||
- Inventory integration (`.github/knowledge/inventory.md`)
|
||||
- Explicit confirmation phrases
|
||||
- Upstream documentation validation
|
||||
|
||||
**Current Duplication:**
|
||||
- **Pre-flight checks:** SSH validation, service discovery logic repeated 6 times
|
||||
- **Inventory lookups:** Same RAG pattern in `service-new`, `service-review`, `service-standardize`
|
||||
- **Gate structure:** Nearly identical gate format across all service prompts
|
||||
- **Output format:** All produce Markdown reports with similar sections
|
||||
|
||||
**Convergence Recommendation:**
|
||||
|
||||
**Option A: Meta-Prompt Architecture (Recommended)**
|
||||
|
||||
Create a single `service-workflow.meta.prompt.md` that defines:
|
||||
|
||||
```yaml
|
||||
# service-workflow.meta.prompt.md
|
||||
workflows:
|
||||
- name: new
|
||||
gates: [0, 1, 2, 3, 4, 5]
|
||||
phases: [validate_sources, plan, analyze, patch, verify]
|
||||
- name: review
|
||||
gates: [0, 1, 2, 3, 4]
|
||||
phases: [discover, compare, report, patch, verify]
|
||||
- name: standardize
|
||||
gates: [0, 1, 2, 3, 4]
|
||||
phases: [locate, assess_risk, propose, apply, bounce]
|
||||
```
|
||||
|
||||
Then reduce individual prompts to:
|
||||
|
||||
```markdown
|
||||
# service-new.prompt.md
|
||||
---
|
||||
extends: service-workflow.meta
|
||||
workflow: new
|
||||
---
|
||||
[Workflow-specific customizations only]
|
||||
```
|
||||
|
||||
**Option B: Consolidate to Single File with Modes**
|
||||
|
||||
Create `service-management.prompt.md` with mode flags:
|
||||
|
||||
```markdown
|
||||
# Usage
|
||||
/service-management mode=new app=traefik
|
||||
/service-management mode=review app=immich
|
||||
```
|
||||
|
||||
**Impact:**
|
||||
- **Reduction:** 6 files → 1 meta-prompt + 6 lightweight configs (or 1 unified file)
|
||||
- **Maintenance:** Single source of truth for gates, inventory logic, security checks
|
||||
- **Risk:** Low if phased migration
|
||||
|
||||
---
|
||||
|
||||
### 1.2 Session Management (Medium Overlap)
|
||||
|
||||
**Affected Prompts:**
|
||||
- `session-start.prompt.md`
|
||||
- `session-end.prompt.md`
|
||||
- `session-status.prompt.md`
|
||||
- `OLD.session-start.prompt.md`
|
||||
- `OLD.session-end.prompt.md`
|
||||
- `OLD.session-status.prompt.md`
|
||||
|
||||
**Analysis:**
|
||||
- **OLD.* versions:** Clearly deprecated (no frontmatter, less structured)
|
||||
- **Current versions:** All reference `SESSION_SNAPSHOT*.md` and perform RAG searches
|
||||
- **Overlap:** All three prompts perform git status checks and snapshot retrieval
|
||||
|
||||
**Convergence Recommendation:**
|
||||
|
||||
**Create:** `session-lifecycle.prompt.md`
|
||||
|
||||
```markdown
|
||||
# session-lifecycle.prompt.md
|
||||
modes:
|
||||
- start: Load snapshot, check drift, present menu
|
||||
- status: Quick realignment without full context
|
||||
- end: Generate snapshot, git operations
|
||||
```
|
||||
|
||||
**Impact:**
|
||||
- **Reduction:** 6 files → 1 unified prompt
|
||||
- **Archive:** Move OLD.* to `.github/prompts/archive/`
|
||||
- **Risk:** Very low, well-defined workflows
|
||||
|
||||
---
|
||||
|
||||
### 1.3 Markdown Conversion (Low Overlap but Redundant)
|
||||
|
||||
**Affected Prompts:**
|
||||
- `md2htmlDARK.prompt.md`
|
||||
- `md2htmlLIGHT.prompt.md`
|
||||
|
||||
**Analysis:**
|
||||
Both prompts are 90% identical, differing only in CSS color schemes.
|
||||
|
||||
**Convergence Recommendation:**
|
||||
|
||||
**Single Prompt with Parameter:**
|
||||
|
||||
```markdown
|
||||
# md2html.prompt.md
|
||||
theme: ${input:theme} # Options: dark, light
|
||||
```
|
||||
|
||||
**Impact:**
|
||||
- **Reduction:** 2 files → 1 file
|
||||
- **Risk:** None
|
||||
|
||||
---
|
||||
|
||||
### 1.4 Draft Prompts (Should Be Eliminated or Completed)
|
||||
|
||||
**Affected Prompts:**
|
||||
- `service-decommission.prompt.md` (draft)
|
||||
- `service-migration.prompt.md` (draft)
|
||||
- `security-hardening.prompt.md` (draft)
|
||||
- `performance-tuning.prompt.md` (draft)
|
||||
|
||||
**Analysis:**
|
||||
All four are labeled "Draft" with generic checklists. They lack:
|
||||
- Gate structure used in other prompts
|
||||
- RAG integration
|
||||
- Specific commands or validation steps
|
||||
- Safety guardrails
|
||||
|
||||
**Recommendation:**
|
||||
Either:
|
||||
1. **Complete them** using the pattern from `service-new.prompt.md` (gated workflow)
|
||||
2. **Archive them** to `.github/prompts/drafts/` until needed
|
||||
3. **Eliminate them** if not actively used
|
||||
|
||||
**Impact:**
|
||||
- Reduces "prompt noise" in main directory
|
||||
- Sets quality bar for production prompts
|
||||
|
||||
---
|
||||
|
||||
## 2. Summary of Convergence Opportunities
|
||||
|
||||
| Prompt Group | Current Count | Proposed Count | Reduction |
|
||||
| :--- | :---: | :---: | :---: |
|
||||
| Service Management | 6 | 1 (+ 6 configs) | 83% code duplication |
|
||||
| Session Lifecycle | 6 | 1 | 83% |
|
||||
| Markdown HTML | 2 | 1 | 50% |
|
||||
| Drafts | 4 | 0 (archived) | 100% |
|
||||
| **Total Prompts** | **26** | **15–17** | **35–42% reduction** |
|
||||
|
||||
---
|
||||
|
||||
## 3. Quality Tiers
|
||||
|
||||
### Tier 1: Production-Ready (8 prompts)
|
||||
These prompts have complete implementation, gate structure, and clear success criteria:
|
||||
|
||||
1. ✅ `service-new.prompt.md` - Best-in-class structure
|
||||
2. ✅ `service-review.prompt.md` - Comprehensive validation
|
||||
3. ✅ `service-standardize.prompt.md` - Clear versioning logic
|
||||
4. ✅ `service-troubleshoot.prompt.md` - OODA loop methodology
|
||||
5. ✅ `sso-onboarding.prompt.md` - Authentik integration
|
||||
6. ✅ `create-commit.msg.prompt.md` - RAG + Conventional Commits
|
||||
7. ✅ `clean-git.prompt.md` - ReAct protocol, security checks
|
||||
8. ✅ `generateVulnerabilitiesReport.prompt.md` - Structured output
|
||||
|
||||
### Tier 2: Functional but Needs Polish (5 prompts)
|
||||
|
||||
9. 🟡 `session-start.prompt.md` - Missing detailed menu structure
|
||||
10. 🟡 `session-end.prompt.md` - Template fallback not defined
|
||||
11. 🟡 `session-status.prompt.md` - Drift detection logic vague
|
||||
12. 🟡 `reviewDockerCompose.prompt.md` - Good but lacks gates
|
||||
13. 🟡 `ansible-tutor.prompt.md` - Too brief, needs examples
|
||||
|
||||
### Tier 3: Draft/Incomplete (9 prompts)
|
||||
|
||||
14. 🔴 `service-decommission.prompt.md` - Generic checklist only
|
||||
15. 🔴 `service-migration.prompt.md` - Generic checklist only
|
||||
16. 🔴 `security-hardening.prompt.md` - Generic checklist only
|
||||
17. 🔴 `performance-tuning.prompt.md` - Generic checklist only
|
||||
18. 🔴 `create-readme.prompt.md` - Incomplete template
|
||||
19. 🔴 `doc-lint.prompt.md` - Phase 3 cut off mid-section
|
||||
20. 🔴 `md2htmlDARK.prompt.md` - Functional but unmaintained
|
||||
21. 🔴 `md2htmlLIGHT.prompt.md` - Duplicate
|
||||
22. 🔴 `README.md` - Outdated references
|
||||
|
||||
### Tier 4: Deprecated (3 prompts)
|
||||
|
||||
23. ⚫ `OLD.session-start.prompt.md` - Archive
|
||||
24. ⚫ `OLD.session-end.prompt.md` - Archive
|
||||
25. ⚫ `OLD.create-commit-msg.prompt.md` - Archive
|
||||
|
||||
---
|
||||
|
||||
## 4. Top 5 Prompts Needing Adjustments
|
||||
|
||||
### 🥇 Rank 1: `reviewDockerCompose.prompt.md`
|
||||
|
||||
**Current State:** Functional mentor-led review prompt but lacks the safety gates present in newer prompts.
|
||||
|
||||
**Issues:**
|
||||
- No explicit confirmation gates (user can't stop workflow)
|
||||
- No RAG integration with inventory or upstream docs
|
||||
- Security audit logic not DRY (duplicates `generateVulnerabilitiesReport.prompt.md`)
|
||||
- Missing rollback/recovery procedures
|
||||
|
||||
**Impact Score:** 9/10 (Used for critical security audits)
|
||||
|
||||
**Recommended Improvements:**
|
||||
|
||||
1. **Add Gate Structure:**
|
||||
```markdown
|
||||
## Gate 0 — confirm target file
|
||||
User must reply exactly: `REVIEW: <compose-file>`
|
||||
|
||||
## Gate 1 — confirm findings
|
||||
User must reply exactly: `CONFIRM FINDINGS: <file>`
|
||||
|
||||
## Gate 2 — apply patches (if requested)
|
||||
User must reply exactly: `APPLY PATCHES: <file>`
|
||||
```
|
||||
|
||||
2. **Integrate with Vulnerability Report:**
|
||||
```markdown
|
||||
## Step 1 — Run Security Scan First
|
||||
Before manual review, execute:
|
||||
`/generateVulnerabilityReport` on the target file.
|
||||
Reference its output to avoid duplicating security checks.
|
||||
```
|
||||
|
||||
3. **Add Inventory Cross-Check:**
|
||||
```markdown
|
||||
## Step 2 — Validate Against Inventory
|
||||
Search `.github/knowledge/inventory.md` for the service.
|
||||
Compare declared image version vs. upstream latest.
|
||||
```
|
||||
|
||||
4. **Define Rollback:**
|
||||
```markdown
|
||||
## Recovery Procedure
|
||||
If changes break the service:
|
||||
1. `git checkout HEAD -- docker-compose.yml`
|
||||
2. `docker compose up -d`
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🥈 Rank 2: `ansible-tutor.prompt.md`
|
||||
|
||||
**Current State:** Minimal prompt with good intent but lacks examples and structure.
|
||||
|
||||
**Issues:**
|
||||
- Only ~15 lines (vs. 150+ in mature prompts)
|
||||
- No gate structure for safety
|
||||
- No examples of "good" vs. "bad" Ansible patterns
|
||||
- Missing integration with existing playbooks in the repo
|
||||
- No validation steps
|
||||
|
||||
**Impact Score:** 8/10 (Critical for teaching correct Ansible patterns)
|
||||
|
||||
**Recommended Improvements:**
|
||||
|
||||
1. **Add Real-World Examples:**
|
||||
```markdown
|
||||
## Anti-Pattern Detection
|
||||
|
||||
### ❌ Bad: Shell Command Overuse
|
||||
```yaml
|
||||
- name: Install Docker
|
||||
shell: curl -fsSL get.docker.com | bash
|
||||
```
|
||||
|
||||
### ✅ Good: Idempotent Module Use
|
||||
```yaml
|
||||
- name: Install Docker
|
||||
apt:
|
||||
name: docker-ce
|
||||
state: present
|
||||
```
|
||||
|
||||
2. **Integrate with Existing Repo:**
|
||||
```markdown
|
||||
## Step 1 — Scan Existing Playbooks
|
||||
Before generating new code:
|
||||
1. Search workspace for `playbooks/*.yml`
|
||||
2. Extract patterns from `roles/*/tasks/main.yml`
|
||||
3. Align new code with existing style
|
||||
```
|
||||
|
||||
3. **Add Safety Gates:**
|
||||
```markdown
|
||||
## Gate 1 — Destructive Action Check
|
||||
If the proposed task includes any of these modules:
|
||||
- `shell` with `rm`, `dd`, `mkfs`
|
||||
- `file` with `state: absent` on system paths
|
||||
|
||||
STOP and require explicit confirmation:
|
||||
User must reply: `I UNDERSTAND THE RISK: <task-name>`
|
||||
```
|
||||
|
||||
4. **Add Validation Workflow:**
|
||||
```markdown
|
||||
## Step 4 — Validation (Required)
|
||||
1. Run `ansible-playbook --syntax-check playbook.yml`
|
||||
2. Run `ansible-playbook --check playbook.yml` (dry-run)
|
||||
3. Provide copy/paste commands for user verification
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🥉 Rank 3: `session-status.prompt.md`
|
||||
|
||||
**Current State:** Cognitive realignment prompt with vague drift detection logic.
|
||||
|
||||
**Issues:**
|
||||
- "Drift Check" criteria poorly defined
|
||||
- No quantifiable metrics (how far off-track is "drift"?)
|
||||
- Missing actionable output (no clear commands)
|
||||
- Phase 3 output format not standardized
|
||||
|
||||
**Impact Score:** 7/10 (Used frequently but output inconsistent)
|
||||
|
||||
**Recommended Improvements:**
|
||||
|
||||
1. **Define Drift Quantitatively:**
|
||||
```markdown
|
||||
## Phase 2: Drift Calculation
|
||||
|
||||
Compute drift score:
|
||||
- Active file NOT in snapshot "Files Changed": +2 drift
|
||||
- Terminal command NOT in snapshot "Next Steps": +1 drift
|
||||
- Open files > 5 and none in snapshot: +3 drift
|
||||
|
||||
Drift Levels:
|
||||
- 0-1: ✅ On track
|
||||
- 2-3: ⚠️ Minor drift
|
||||
- 4+: 🚨 Major drift (pruning required)
|
||||
```
|
||||
|
||||
2. **Standardize HUD Output:**
|
||||
```markdown
|
||||
## Phase 3: Heads-Up Display (HUD)
|
||||
|
||||
### Status Report
|
||||
| Metric | Status | Action |
|
||||
|:---|:---|:---|
|
||||
| Drift Score | 4 🚨 | Pruning recommended |
|
||||
| Last Snapshot | 2h ago | Recent |
|
||||
| Active Task | Fix traefik labels | ⚠️ Not in snapshot |
|
||||
| Blockers | None | - |
|
||||
|
||||
### Recommended Command
|
||||
To realign, run:
|
||||
```bash
|
||||
git checkout main
|
||||
cd _thelab/core/web/traefik
|
||||
```
|
||||
```
|
||||
|
||||
3. **Add Memory Compression:**
|
||||
```markdown
|
||||
## Phase 4: Context Compression (If Drift > 5)
|
||||
Summarize current conversation in 3 bullets:
|
||||
- What we tried
|
||||
- What failed
|
||||
- What's next
|
||||
|
||||
Then clear terminal history to reduce cognitive load.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🏅 Rank 4: Service Draft Prompts (Group)
|
||||
|
||||
**Affected:** `service-decommission`, `service-migration`, `security-hardening`, `performance-tuning`
|
||||
|
||||
**Current State:** All are generic checklists with no implementation logic.
|
||||
|
||||
**Issues:**
|
||||
- No gate structure
|
||||
- No integration with existing tooling
|
||||
- No validation steps
|
||||
- No examples or commands
|
||||
|
||||
**Impact Score:** 6/10 (Blocking future workflows)
|
||||
|
||||
**Recommended Improvements:**
|
||||
|
||||
**Template to Follow:** Use `service-new.prompt.md` as the gold standard.
|
||||
|
||||
**Example: Complete `service-decommission.prompt.md`**
|
||||
|
||||
```markdown
|
||||
---
|
||||
description: "Guided, gated workflow for safely decommissioning a service."
|
||||
---
|
||||
|
||||
# [ROLE]
|
||||
You are a **DevOps SRE** acting as a **decomm specialist**.
|
||||
|
||||
# [GOAL]
|
||||
Safely retire a service by:
|
||||
- Backing up all data and configs
|
||||
- Validating no dependencies
|
||||
- Removing from production
|
||||
- Updating documentation
|
||||
|
||||
# [INPUTS]
|
||||
- Target service name: `${input:serviceName}`
|
||||
- Backup destination: `${input:backupPath}`
|
||||
- Inventory file path: `${input:inventoryFile}`
|
||||
|
||||
# [WORKFLOW]
|
||||
|
||||
## Gate 0 — select service for decommission
|
||||
User must reply exactly: `DECOMMISSION: <service-name>`
|
||||
|
||||
## Step 1 — dependency scan
|
||||
Search all `docker-compose.yml` files for:
|
||||
- Services with `depends_on: <service-name>`
|
||||
- Networks shared with this service
|
||||
- Volumes referenced by other services
|
||||
|
||||
If dependencies found, STOP and list them.
|
||||
|
||||
## Gate 1 — confirm no dependencies
|
||||
User must reply exactly: `CONFIRM NO DEPS: <service-name>`
|
||||
|
||||
## Step 2 — backup execution
|
||||
1. Export service data: `docker compose cp <service>:/data ./backup/`
|
||||
2. Export configs: `docker compose config > backup/compose.yml`
|
||||
3. Verify backup integrity
|
||||
|
||||
## Gate 2 — confirm backup complete
|
||||
User must reply exactly: `BACKUP VERIFIED: <service-name>`
|
||||
|
||||
## Step 3 — removal
|
||||
1. Stop service: `docker compose stop <service>`
|
||||
2. Remove container: `docker compose rm <service>`
|
||||
3. Remove from compose file
|
||||
4. Remove from inventory
|
||||
|
||||
## Step 4 — validation
|
||||
1. `docker compose config` (syntax check)
|
||||
2. `docker compose ps` (ensure service gone)
|
||||
3. Check logs for errors in dependent services
|
||||
|
||||
## Gate 3 — confirm clean removal
|
||||
User must reply exactly: `REMOVAL CONFIRMED: <service-name>`
|
||||
|
||||
## Step 5 — documentation update
|
||||
Update:
|
||||
- `.github/knowledge/inventory.md` (mark as decommissioned)
|
||||
- `documentation/architecture/` (remove service from diagrams)
|
||||
- `README.md` (if listed)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🏅 Rank 5: `doc-lint.prompt.md`
|
||||
|
||||
**Current State:** Incomplete - Phase 3 report section is cut off.
|
||||
|
||||
**Issues:**
|
||||
- Output section truncated at line 50 (file continues to 61)
|
||||
- Missing "Recommended Fixes" and "Low Priority" sections
|
||||
- No auto-fix capability
|
||||
- No integration with `style.markdown.md` validation
|
||||
|
||||
**Impact Score:** 5/10 (Useful but incomplete)
|
||||
|
||||
**Recommended Improvements:**
|
||||
|
||||
1. **Complete the Report Structure:**
|
||||
```markdown
|
||||
### Phase 3: The Report
|
||||
|
||||
#### 🔴 Critical Errors (Must Fix)
|
||||
- [Line 42] Missing language tag in code block
|
||||
- [Line 105] Broken internal link: `./missing-file.md`
|
||||
|
||||
#### 🟡 Recommended Improvements
|
||||
- [Line 12] Use Sentence Case for heading
|
||||
- [Line 67] Replace "e.g." with "for example"
|
||||
|
||||
#### 🔵 Low Priority / Style
|
||||
- [Line 89] Consider adding more whitespace between sections
|
||||
```
|
||||
|
||||
2. **Add Auto-Fix Mode:**
|
||||
```markdown
|
||||
## Phase 4: Auto-Fix (Optional)
|
||||
|
||||
If user replies exactly: `AUTO-FIX: <filename>`
|
||||
|
||||
Then apply these corrections:
|
||||
- Add language tags to code blocks
|
||||
- Convert headers to Sentence Case
|
||||
- Remove trailing whitespace
|
||||
- Fix relative links
|
||||
```
|
||||
|
||||
3. **Add Validation:**
|
||||
```markdown
|
||||
## Phase 5: Validation
|
||||
|
||||
After fixes:
|
||||
1. Re-run lint
|
||||
2. Confirm 0 Critical Errors
|
||||
3. Generate pass/fail badge for README
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Implementation Roadmap
|
||||
|
||||
### Phase 1: Immediate Cleanup (Week 1)
|
||||
- [ ] Archive OLD.* prompts to `.github/prompts/archive/`
|
||||
- [ ] Move draft prompts to `.github/prompts/drafts/`
|
||||
- [ ] Converge `md2html` into single parameterized prompt
|
||||
- [ ] Update `README.md` with accurate inventory
|
||||
|
||||
### Phase 2: High-Impact Improvements (Weeks 2-3)
|
||||
- [ ] Enhance `reviewDockerCompose.prompt.md` (Rank 1)
|
||||
- [ ] Expand `ansible-tutor.prompt.md` (Rank 2)
|
||||
- [ ] Fix `session-status.prompt.md` drift logic (Rank 3)
|
||||
- [ ] Complete `doc-lint.prompt.md` (Rank 5)
|
||||
|
||||
### Phase 3: Service Prompt Convergence (Week 4)
|
||||
- [ ] Create `service-workflow.meta.prompt.md`
|
||||
- [ ] Refactor 6 service prompts to use meta-prompt
|
||||
- [ ] Test all workflows with real use cases
|
||||
|
||||
### Phase 4: Draft Completion (Weeks 5-6)
|
||||
- [ ] Complete `service-decommission.prompt.md`
|
||||
- [ ] Complete `service-migration.prompt.md`
|
||||
- [ ] Complete `security-hardening.prompt.md`
|
||||
- [ ] Complete `performance-tuning.prompt.md`
|
||||
|
||||
---
|
||||
|
||||
## 6. Metrics & Success Criteria
|
||||
|
||||
### Baseline (Current State)
|
||||
- **Total Prompts:** 26
|
||||
- **Production-Ready:** 8 (31%)
|
||||
- **Code Duplication:** ~60% across service prompts
|
||||
- **Deprecated Content:** 3 prompts
|
||||
|
||||
### Target State (Post-Implementation)
|
||||
- **Total Prompts:** 15-17 (-35%)
|
||||
- **Production-Ready:** 15 (88%)
|
||||
- **Code Duplication:** <20%
|
||||
- **Deprecated Content:** 0 (archived)
|
||||
|
||||
### Quality Gates
|
||||
- ✅ All production prompts have gate structure
|
||||
- ✅ All prompts have YAML frontmatter
|
||||
- ✅ All prompts reference methodology (ReAct, CoT, etc.)
|
||||
- ✅ All prompts include validation steps
|
||||
- ✅ All prompts have rollback procedures
|
||||
|
||||
---
|
||||
|
||||
## 7. Recommendations Summary
|
||||
|
||||
### Critical Actions
|
||||
1. **Converge service prompts** → Single meta-prompt pattern (saves ~800 lines of duplicate code)
|
||||
2. **Fix `reviewDockerCompose.prompt.md`** → Add gates and integrate with vulnerability scanning
|
||||
3. **Expand `ansible-tutor.prompt.md`** → Add examples, safety checks, and validation
|
||||
|
||||
### High Priority
|
||||
4. **Archive deprecated prompts** → Clean up OLD.* files
|
||||
5. **Complete `doc-lint.prompt.md`** → Finish truncated output section
|
||||
6. **Standardize `session-status.prompt.md`** → Quantify drift detection
|
||||
|
||||
### Medium Priority
|
||||
7. **Converge `md2html` prompts** → Single parameterized version
|
||||
8. **Complete draft prompts** → Follow `service-new.prompt.md` pattern
|
||||
|
||||
### Low Priority
|
||||
9. **Update README.md** → Reflect actual prompt inventory
|
||||
10. **Add testing framework** → Validate prompts before deployment
|
||||
|
||||
---
|
||||
|
||||
## 8. Conclusion
|
||||
|
||||
The prompt repository has strong foundational patterns (gated workflows, RAG integration, safety guardrails) but suffers from:
|
||||
- **Duplication:** 60% code overlap in service management prompts
|
||||
- **Inconsistency:** 3 quality tiers with 9 incomplete drafts
|
||||
- **Maintenance Burden:** 26 prompts to update when patterns evolve
|
||||
|
||||
**Recommended Strategy:** Phased convergence using meta-prompt architecture, starting with service management workflows (highest ROI). This reduces maintenance burden while preserving flexibility for specialized workflows.
|
||||
|
||||
**Estimated Effort:**
|
||||
- Phase 1 (Cleanup): 2-4 hours
|
||||
- Phase 2 (High-Impact): 8-12 hours
|
||||
- Phase 3 (Convergence): 16-20 hours
|
||||
- Phase 4 (Draft Completion): 12-16 hours
|
||||
- **Total:** 38-52 hours over 6 weeks
|
||||
|
||||
---
|
||||
|
||||
**Report Generated:** 2026-01-09
|
||||
**Methodology:** Static analysis + pattern detection + quality scoring
|
||||
**Scope:** 26 prompt files in `.github/prompts/`
|
||||
**Next Review:** 2026-02-09 (post-Phase 2 completion)
|
||||
@ -0,0 +1,240 @@
|
||||
# Ansible quality gates
|
||||
|
||||
This document defines the quality standards, review checklist, and validation workflow for all Ansible code in this repository.
|
||||
|
||||
## Philosophy
|
||||
|
||||
Quality gates progress through three enforcement tiers:
|
||||
|
||||
- **Tier 1 (Advisory):** Visible via lint warnings; not blocking. Baseline cleanup phase.
|
||||
- **Tier 2 (Mandatory — current):** Must pass for swarm-impacting changes. CI enforces.
|
||||
- **Tier 3 (Fully blocking):** All rules enforced on every commit. Target: Phase 3 roadmap.
|
||||
|
||||
**Idempotency controls are Tier 2 (mandatory now) for all stack-impacting changes.**
|
||||
This means: changed_when, manager-state assertions, secret preflight asserts,
|
||||
bind-mount path asserts, and validate-only mode support are required, not advisory.
|
||||
|
||||
## Linting
|
||||
|
||||
### Configuration
|
||||
|
||||
The repository includes [.ansible-lint](../../.ansible-lint) configuration that enforces:
|
||||
|
||||
* **Moderate profile** — Balanced between permissive and strict
|
||||
* **Advisory rules** — No blocking on known patterns (e.g., raw commands in bootstrap playbooks)
|
||||
* **Warnings** — Experimental syntax and risky permissions are flagged but not blocked
|
||||
|
||||
### Running lint checks
|
||||
|
||||
```bash
|
||||
# Lint all playbooks and roles
|
||||
cd /home/chester/homelab/ansible
|
||||
ansible-lint
|
||||
|
||||
# Lint specific playbook
|
||||
ansible-lint playbooks/onboarding/generic_host.yml
|
||||
|
||||
# Lint entire role
|
||||
ansible-lint roles/monitoring_stack/
|
||||
```
|
||||
|
||||
### Installing ansible-lint
|
||||
|
||||
```bash
|
||||
# On control node (Ubuntu/Debian)
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y python3-pip
|
||||
pip3 install ansible-lint
|
||||
|
||||
# Verify installation
|
||||
ansible-lint --version
|
||||
```
|
||||
|
||||
## Quality checklist
|
||||
|
||||
Use this checklist when creating or reviewing playbooks and roles:
|
||||
|
||||
### Security
|
||||
|
||||
* [ ] **No SSH bypasses** — `StrictHostKeyChecking=no` is forbidden
|
||||
* [ ] **Host key checking enabled** — `ansible.cfg` must have `host_key_checking = True`
|
||||
* [ ] **Secrets vaulted** — No plaintext passwords in defaults, vars, or playbooks
|
||||
* [ ] **Secrets validated** — Roles requiring secrets include `assert` tasks to fail fast
|
||||
* [ ] **File permissions explicit** — All `file`, `copy`, `template` tasks specify `mode`
|
||||
* [ ] **No root by default** — Use `become: true` only when necessary
|
||||
|
||||
### Idempotency
|
||||
|
||||
* [x] **Changed semantics** — All `command`/`shell` tasks include `changed_when` (**mandatory**)
|
||||
* [x] **Error handling** — All `command`/`shell` tasks include `failed_when` or `ignore_errors` (**mandatory**)
|
||||
* [x] **Check mode safe** — Playbooks can run with `--check` without errors (**mandatory**)
|
||||
* [x] **Replay safe** — Running twice produces no changes on second run (**mandatory**; PR evidence required)
|
||||
* [x] **Manager assertion** — Swarm manager checks use exact equality (`== 'active|true'`), not substring search (**mandatory**)
|
||||
* [x] **Absent idempotency** — Stack removal checks existence first; no false `changed` when already absent (**mandatory**)
|
||||
* [x] **Validate-only mode** — All stack deploy playbooks support `stack_validate_only=true` (**mandatory**)
|
||||
|
||||
### Modularity
|
||||
|
||||
* [ ] **Roles over monoliths** — Multi-task logic belongs in roles, not massive playbooks
|
||||
* [ ] **Builtin modules first** — Prefer `ansible.builtin.*` over `command`/`shell`/`raw`
|
||||
* [ ] **Bootstrap exception** — `raw` commands are acceptable only for pre-Python tasks
|
||||
* [ ] **Variables separated** — Environment-specific values live in `group_vars`, not role defaults
|
||||
|
||||
### Maintainability
|
||||
|
||||
* [ ] **Task names descriptive** — Each task has a clear, action-oriented name
|
||||
* [ ] **Tags applied** — Logical grouping with tags (e.g., `setup`, `security`, `monitoring`)
|
||||
* [ ] **Documentation inline** — Complex logic includes comments explaining "why"
|
||||
* [ ] **Handlers for services** — Service restarts use handlers, not inline tasks
|
||||
|
||||
## Mandatory pre-deploy gate (effective now — blocking for all stack changes)
|
||||
|
||||
> [!IMPORTANT]
|
||||
> All steps below MUST pass before merging any pull request that touches
|
||||
> `ansible/templates/stacks/`, `ansible/playbooks/docker/deploy_*.yml`,
|
||||
> or `ansible/roles/swarm_stack_deploy/`.
|
||||
> The Gitea CI workflow (`.gitea/workflows/stack-idempotency.yml`) runs
|
||||
> stages 1–3 automatically on every PR. The two-run idempotency proof
|
||||
> (step 6 below) must be performed manually and included as PR evidence.
|
||||
|
||||
For any swarm-impacting change, all checks below must pass before deployment:
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
|
||||
# 1) Inventory parse gate
|
||||
ansible-inventory -i inventory/hosts.ini --graph
|
||||
|
||||
# 2) Connectivity gate
|
||||
ansible -i inventory/hosts.ini swarm_hosts -m ping
|
||||
|
||||
# 3) Swarm control-plane gate
|
||||
ansible -i inventory/hosts.ini swarm_managers -m shell -a "docker info 2>/dev/null | grep -E 'Swarm:|Is Manager:'"
|
||||
|
||||
# 4) Playbook syntax gate
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/your-playbook.yml --syntax-check
|
||||
|
||||
# 5) Control node sanity gate
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/preflight/validate_control_node.yml
|
||||
|
||||
# 6) Validate-only preflight (no Swarm mutations — mandatory for stack changes)
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_<service>.yml \
|
||||
-e "stack_validate_only=true" \
|
||||
--vault-password-file .vault_pass
|
||||
|
||||
# 7) TWO-RUN IDEMPOTENCY PROOF (required for stack PRs — attach output as evidence)
|
||||
# Run 1: apply desired state
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_<service>.yml \
|
||||
--vault-password-file .vault_pass \
|
||||
2>&1 | tee /tmp/run1.log
|
||||
|
||||
# Run 2: replay — MUST report changed=0 for stack tasks
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_<service>.yml \
|
||||
--vault-password-file .vault_pass \
|
||||
2>&1 | tee /tmp/run2.log
|
||||
|
||||
# Verify: second run must show changed=0 for deploy/reconcile tasks
|
||||
grep -E 'changed=[^0]' /tmp/run2.log && echo 'IDEMPOTENCY FAIL' || echo 'IDEMPOTENCY PASS'
|
||||
```
|
||||
|
||||
## PR evidence pack (required for stack-impacting changes)
|
||||
|
||||
For any PR that modifies a stack template, deploy playbook, or the
|
||||
`swarm_stack_deploy` role, attach the following to the PR description:
|
||||
|
||||
```
|
||||
### Idempotency evidence
|
||||
|
||||
**Stack:** <service>
|
||||
**Date:** YYYY-MM-DD
|
||||
**Operator:** @username
|
||||
|
||||
**Run 1 summary:**
|
||||
```
|
||||
PLAY RECAP ***
|
||||
swarm-manager-1 : ok=N changed=N ...
|
||||
```
|
||||
|
||||
**Run 2 summary (must show changed=0 for stack tasks):**
|
||||
```
|
||||
PLAY RECAP ***
|
||||
swarm-manager-1 : ok=N changed=0 ...
|
||||
```
|
||||
|
||||
**Validate-only passed:** yes/no
|
||||
**Lint passed:** yes/no (CI enforced)
|
||||
**Syntax check passed:** yes/no (CI enforced)
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> A PR that cannot demonstrate changed=0 on the second run MUST NOT be merged.
|
||||
|
||||
|
||||
|
||||
Before committing changes, always run syntax checks:
|
||||
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
|
||||
# Check specific playbook
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/your-playbook.yml --syntax-check
|
||||
|
||||
# Preflight validation (control node sanity)
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/preflight/validate_control_node.yml
|
||||
```
|
||||
|
||||
## Idempotency testing
|
||||
|
||||
High-risk playbooks (those modifying system state) should be tested for idempotency:
|
||||
|
||||
```bash
|
||||
# Run playbook twice; second run should report "changed=0"
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/your-playbook.yml
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/your-playbook.yml
|
||||
```
|
||||
|
||||
## Review process
|
||||
|
||||
### Pre-commit (developer)
|
||||
|
||||
1. Run inventory parse gate and connectivity gate
|
||||
2. Run syntax check on modified playbooks
|
||||
3. Run ansible-lint on modified playbooks/roles (**Tier 2: mandatory for stack files**)
|
||||
4. For stack changes, run validate-only preflight
|
||||
5. For stack changes, run idempotency proof (two-run) and collect evidence
|
||||
6. Ensure required secrets are provided via vault (no plaintext defaults)
|
||||
|
||||
### Pre-merge (reviewer)
|
||||
|
||||
1. Verify security checklist items are addressed
|
||||
2. Spot-check modularity (no 500+ line playbooks)
|
||||
3. Confirm environment-specific values are in inventory, not defaults
|
||||
4. Confirm no root-level duplicate Ansible directories were introduced
|
||||
5. **For stack changes: verify PR evidence pack is attached and shows changed=0 on second run**
|
||||
6. For critical changes (security, networking), require idempotency proof
|
||||
|
||||
* **Weekly:** Triage Critical/High findings from drift reports
|
||||
* **Biweekly:** Run preflight validation suite
|
||||
* **Monthly:** Generate fresh standards-drift audit and review trends
|
||||
|
||||
## Roadmap
|
||||
|
||||
As baseline quality improves, the repository will:
|
||||
|
||||
1. **Phase 1 (current):** Mandatory idempotency gate for stack changes. Lint advisory for
|
||||
non-stack playbooks. Gitea CI blocks stack PRs on lint + syntax + preflight failures.
|
||||
`no-changed-when` promoted from skip to warn (visible everywhere).
|
||||
2. **Phase 2 (3 months):** Mandatory lint for all new/modified playbooks.
|
||||
`no-changed-when` moved to blocking; bootstrap exceptions suppressed inline with
|
||||
`# noqa: no-changed-when` on specific tasks.
|
||||
3. **Phase 3 (6 months):** Full baseline coverage, stricter profile. All remaining
|
||||
idempotency violations resolved. Two-run check automated in CI for eligible stacks.
|
||||
4. **Phase 4 (12 months):** Fully blocking CI on every commit. Molecule/integration
|
||||
tests for multi-node Swarm scenarios.
|
||||
|
||||
## References
|
||||
|
||||
* [Ansible Best Practices](https://docs.ansible.com/ansible/latest/tips_tricks/ansible_tips_tricks.html)
|
||||
* [ansible-lint documentation](https://ansible-lint.readthedocs.io/)
|
||||
* [environment-constraints.md](./environment-constraints.md) — Infrastructure-specific rules
|
||||
* [naming-conventions.md](./naming-conventions.md) — File and variable naming standards
|
||||
@ -0,0 +1,151 @@
|
||||
# Environment constraints
|
||||
|
||||
**Date:** 2026-01-10
|
||||
**Status:** Living document
|
||||
**Author:** Chester + FrankGPT
|
||||
|
||||
## Purpose
|
||||
|
||||
This document defines the hardware, software, and network constraints of the homelab environment. All playbooks and roles must respect these constraints.
|
||||
|
||||
---
|
||||
|
||||
## Network topology
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Current operational state is still a flat network on `10.0.0.0/24`.
|
||||
> VLAN segmentation and target zone allocations in this document are migration targets,
|
||||
> not fully applied runtime state.
|
||||
|
||||
| Parameter | Value |
|
||||
| :--- | :--- |
|
||||
| Subnet | `10.0.0.0/24` |
|
||||
| Gateway | `10.0.0.2` |
|
||||
| Primary DNS | `10.0.0.2` |
|
||||
| Secondary DNS | `8.8.8.8` |
|
||||
| Domain | `local` (optional) |
|
||||
|
||||
### IP allocation scheme
|
||||
|
||||
| Range | Purpose |
|
||||
| :--- | :--- |
|
||||
| `10.0.0.1` | Reserved |
|
||||
| `10.0.0.2` | Gateway / Primary DNS |
|
||||
| `10.0.0.3 - 10.0.0.199` | DHCP / General devices |
|
||||
| `10.0.0.200 - 10.0.0.209` | Proxmox hosts (physical) |
|
||||
| `10.0.0.210 - 10.0.0.219` | Swarm managers (VMs) |
|
||||
| `10.0.0.220 - 10.0.0.229` | Swarm workers (VMs) / legacy AI nodes during migration |
|
||||
| `10.0.0.230 - 10.0.0.239` | AI workstations |
|
||||
| `10.0.0.240 - 10.0.0.248` | Reserved / Future |
|
||||
| `10.0.0.249 - 10.0.0.250` | NAS devices |
|
||||
| `10.0.0.251 - 10.0.0.254` | Docker hosts / Misc |
|
||||
|
||||
---
|
||||
|
||||
## Host categories
|
||||
|
||||
### Proxmox cluster (physical)
|
||||
|
||||
| Hostname | IP | Hardware | Notes |
|
||||
| :--- | :---: | :--- | :--- |
|
||||
| `pve01` | `10.0.0.201` | Lenovo SFF, 16 GB RAM, 512 GB NVMe | First node, 2× NICs |
|
||||
| `pve02` | `10.0.0.202` | (future) | |
|
||||
| `pve03` | `10.0.0.203` | (future) | |
|
||||
| `pve04` | `10.0.0.204` | (future) | |
|
||||
| `pve05` | `10.0.0.205` | (future) | |
|
||||
|
||||
**Constraints:**
|
||||
- Proxmox VE 8.x or 9.x
|
||||
- `ansible_user=root` for provisioning
|
||||
- Python 3 available at `/usr/bin/python3`
|
||||
|
||||
### Swarm nodes (VMs on Proxmox)
|
||||
|
||||
| Role | Hostname pattern | IP range | Specs |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| Manager | `swarm-manager-X` | `.211 - .215` | 4 GB RAM, 2 vCPU, 32 GB disk |
|
||||
| Worker | `swarm-worker-X` | `.221 - .225` | 4 GB RAM, 2 vCPU, 32 GB disk |
|
||||
|
||||
**Constraints:**
|
||||
- Ubuntu 24.04 LTS (Noble)
|
||||
- Docker CE installed via official repo
|
||||
- `ansible_user=chester`
|
||||
|
||||
### AI workstations (physical)
|
||||
|
||||
| Hostname | IP | Hardware | Notes |
|
||||
| :--- | :---: | :--- | :--- |
|
||||
| `ai-lenovo` | `10.0.0.220` | Laptop, 12 GB GPU | Ubuntu Server |
|
||||
|
||||
**Constraints:**
|
||||
- Ubuntu Server (not Desktop)
|
||||
- GPU drivers managed separately
|
||||
- `ansible_user=chester`
|
||||
|
||||
### Storage / NAS (appliances)
|
||||
|
||||
| Hostname | IP | Product | Notes |
|
||||
| :--- | :---: | :--- | :--- |
|
||||
| `synology` | `10.0.0.249` | Synology NAS | Proprietary Linux, limited shell |
|
||||
| `terramaster` | `10.0.0.250` | TerraMaster NAS | Proprietary Linux, limited shell |
|
||||
|
||||
**Constraints:**
|
||||
- **Caution required** — proprietary OS, not standard Ubuntu
|
||||
- Use `ansible_scp_if_ssh=True` for Synology
|
||||
- Avoid destructive commands; test in check mode first
|
||||
- Limited Python support; prefer `raw` module when needed
|
||||
|
||||
### Controller (watchtower)
|
||||
|
||||
| Hostname | IP | Hardware | Notes |
|
||||
| :--- | :---: | :--- | :--- |
|
||||
| `localhost` | N/A | Raspberry Pi 5 | Ansible controller |
|
||||
|
||||
**Constraints:**
|
||||
- `ansible_connection=local`
|
||||
- Runs all playbooks from this host
|
||||
- ARM64 architecture (consider when building containers)
|
||||
|
||||
---
|
||||
|
||||
## Software standards
|
||||
|
||||
| Component | Version | Notes |
|
||||
| :--- | :--- | :--- |
|
||||
| Ansible | 2.15+ | Core automation |
|
||||
| Python | 3.10+ | Required on all managed hosts |
|
||||
| Docker CE | Latest stable | Swarm mode |
|
||||
| Proxmox VE | 8.x or 9.x | Hypervisor |
|
||||
| Ubuntu | 24.04 LTS | Guest OS for VMs |
|
||||
|
||||
---
|
||||
|
||||
## Firewall / ports
|
||||
|
||||
| Port | Protocol | Purpose | Required on |
|
||||
| :---: | :---: | :--- | :--- |
|
||||
| 22 | TCP | SSH | All hosts |
|
||||
| 8006 | TCP | Proxmox GUI | Proxmox hosts |
|
||||
| 2377 | TCP | Swarm cluster mgmt | Swarm nodes |
|
||||
| 7946 | TCP/UDP | Swarm node comm | Swarm nodes |
|
||||
| 4789 | UDP | Swarm overlay network | Swarm nodes |
|
||||
|
||||
---
|
||||
|
||||
## Documentation mandate
|
||||
|
||||
> [!IMPORTANT]
|
||||
> **FrankGPT core principle:** Documentation is not optional.
|
||||
>
|
||||
> - Every decision must be recorded in `documentation/standards/`
|
||||
> - Every playbook must have a header comment explaining usage
|
||||
> - Every variable must be documented in defaults or group_vars
|
||||
> - When in doubt, write it down
|
||||
|
||||
---
|
||||
|
||||
## Change log
|
||||
|
||||
| Date | Change | Author |
|
||||
| :--- | :--- | :--- |
|
||||
| 2026-01-10 | Initial creation | Chester + FrankGPT |
|
||||
@ -0,0 +1,178 @@
|
||||
# Naming conventions
|
||||
|
||||
**Date:** 2026-01-10
|
||||
**Status:** Approved
|
||||
**Author:** Chester + FrankGPT
|
||||
|
||||
## Purpose
|
||||
|
||||
Consistent naming reduces cognitive load, prevents errors, and makes the codebase navigable for future contributors (including future-you).
|
||||
|
||||
---
|
||||
|
||||
## General principles
|
||||
|
||||
1. **Be descriptive:** Names should explain *what* something is or *what* it does.
|
||||
2. **Be consistent:** Once you pick a pattern, stick to it everywhere.
|
||||
3. **Avoid abbreviations:** Write `network` not `net`, `manager` not `mgr` — unless the abbreviation is industry-standard (e.g., `vm`, `ip`, `ssh`).
|
||||
4. **Use English:** All identifiers, comments, and documentation in English.
|
||||
|
||||
---
|
||||
## Files and folders
|
||||
|
||||
| Element | Convention | Example |
|
||||
| :--- | :--- | :--- |
|
||||
| Folders | lowercase, singular noun | `docker/`, `proxmox/`, `onboarding/` |
|
||||
| Playbooks | `snake_case.yml` | `provision_swarm_vms.yml` |
|
||||
| Roles | `snake_case` | `proxmox_post_install` |
|
||||
| Templates | `filename.ext.j2` | `docker-compose.yml.j2` |
|
||||
| Variable files | `snake_case.yml` | `swarm_defaults.yml` |
|
||||
|
||||
### Playbook naming pattern
|
||||
|
||||
Use **verb + object** format:
|
||||
|
||||
| Verb | Use when | Example |
|
||||
| :--- | :--- | :--- |
|
||||
| `provision_` | Creating infrastructure | `provision_swarm_vms.yml` |
|
||||
| `configure_` | Modifying settings | `configure_nas.yml` |
|
||||
| `deploy_` | Pushing applications | `deploy_portainer.yml` |
|
||||
| `init_` | First-time setup | `init_cluster.yml` |
|
||||
| `update_` | Applying updates | `update_containers.yml` |
|
||||
| `validate_` | Checking correctness | `validate_karakeep.yml` |
|
||||
| `test_` | Running tests | `test_ollama.yml` |
|
||||
| `enforce_` | Ensuring compliance | `enforce_access.yml` |
|
||||
| `remove_` | Deleting resources | `remove_old_images.yml` |
|
||||
|
||||
**Exceptions:** Master/orchestrator playbooks may be named after their target scope:
|
||||
- `proxmox_host.yml` — orchestrates full PVE onboarding
|
||||
- `ai_workstation.yml` — orchestrates AI host setup
|
||||
|
||||
---
|
||||
|
||||
## Inventory
|
||||
|
||||
| Element | Convention | Example |
|
||||
| :--- | :--- | :--- |
|
||||
| Group names | `snake_case` | `proxmox_cluster`, `swarm_managers` |
|
||||
| Hostnames | `kebab-case` | `pve-01`, `swarm-manager-1` |
|
||||
| Child groups | `parent:children` syntax | `ubuntu_lab:children` |
|
||||
|
||||
### Hostname pattern
|
||||
|
||||
```
|
||||
<role>-<index>
|
||||
```
|
||||
|
||||
| Role | Pattern | Examples |
|
||||
| :--- | :--- | :--- |
|
||||
| Proxmox hosts | `pve-0X` | `pve-01`, `pve-02` |
|
||||
| Swarm managers | `swarm-manager-X` | `swarm-manager-1` |
|
||||
| Swarm workers | `swarm-worker-X` | `swarm-worker-1` |
|
||||
| AI workstations | `ai-<name>` | `ai-lenovo`, `ai-surface1` |
|
||||
| Docker hosts | `<name>` or `docker-0X` | `waldorf`, `docker-01` |
|
||||
| Storage | `<product>` | `synology`, `terramaster` |
|
||||
|
||||
---
|
||||
|
||||
## Variables
|
||||
|
||||
| Element | Convention | Example |
|
||||
| :--- | :--- | :--- |
|
||||
| All variables | `snake_case` | `vm_disk_size` |
|
||||
| Role defaults | Prefix with role name | `proxmox_post_install_enabled` |
|
||||
| Boolean vars | Use positive names | `enable_ha` (not `disable_ha`) |
|
||||
| List vars | Plural nouns | `required_packages`, `allowed_users` |
|
||||
| Dict vars | Singular noun | `vm_config`, `network_settings` |
|
||||
|
||||
### Variable prefixes by scope
|
||||
|
||||
| Scope | Prefix | Example |
|
||||
| :--- | :--- | :--- |
|
||||
| Role-specific | `<role>_` | `proxmox_post_install_enabled` |
|
||||
| Playbook-local | `_` (single underscore) | `_temp_file` |
|
||||
| Global/shared | none | `ansible_user`, `ssh_key_path` |
|
||||
|
||||
### Reserved variable names
|
||||
|
||||
Never override these Ansible built-ins:
|
||||
- `inventory_hostname`, `ansible_host`, `ansible_user`
|
||||
- `ansible_become`, `ansible_become_pass`
|
||||
- `hostvars`, `groups`, `group_names`
|
||||
|
||||
---
|
||||
|
||||
## Tasks and handlers
|
||||
|
||||
| Element | Convention | Example |
|
||||
| :--- | :--- | :--- |
|
||||
| Task names | Sentence case, descriptive | `Install required packages` |
|
||||
| Handler names | `Restart <service>` or `Reload <service>` | `Restart docker` |
|
||||
| Block names | `<Action> <scope>` | `Configure SSH access` |
|
||||
| Tags | `snake_case`, short | `install`, `configure`, `test` |
|
||||
|
||||
### Task naming rules
|
||||
|
||||
1. **Start with a verb:** `Install`, `Configure`, `Create`, `Remove`, `Ensure`, `Check`
|
||||
2. **Be specific:** `Install Docker CE` not `Install Docker`
|
||||
3. **No trailing punctuation:** `Install packages` not `Install packages.`
|
||||
4. **Use present tense:** `Create user` not `Created user`
|
||||
|
||||
---
|
||||
|
||||
## Tags
|
||||
|
||||
Use tags to allow selective execution:
|
||||
|
||||
| Tag | Purpose | Example usage |
|
||||
| :--- | :--- | :--- |
|
||||
| `install` | Package installation | `--tags install` |
|
||||
| `configure` | Configuration changes | `--tags configure` |
|
||||
| `test` | Validation/testing | `--tags test` |
|
||||
| `cleanup` | Removal/pruning | `--tags cleanup` |
|
||||
| `never` | Skip unless explicit | `--tags never,dangerous_task` |
|
||||
|
||||
---
|
||||
|
||||
## Secrets and sensitive data
|
||||
|
||||
| Element | Convention | Example |
|
||||
| :--- | :--- | :--- |
|
||||
| Vault files | `vault_<scope>.yml` | `vault_production.yml` |
|
||||
| Secret vars | Suffix with `_secret` or `_pass` | `db_password`, `api_key_secret` |
|
||||
| Encrypted strings | Use `!vault` tag | `password: !vault |...` |
|
||||
|
||||
---
|
||||
|
||||
## Git branches (if applicable)
|
||||
|
||||
| Branch | Purpose |
|
||||
| :--- | :--- |
|
||||
| `main` | Production-ready playbooks |
|
||||
| `develop` | Integration branch |
|
||||
| `feature/<name>` | New features |
|
||||
| `fix/<name>` | Bug fixes |
|
||||
| `docs/<name>` | Documentation updates |
|
||||
|
||||
---
|
||||
|
||||
## Quick reference card
|
||||
|
||||
```
|
||||
Files: snake_case.yml
|
||||
Folders: lowercase/
|
||||
Roles: snake_case
|
||||
Hostnames: kebab-case
|
||||
Groups: snake_case
|
||||
Variables: snake_case
|
||||
Tasks: Sentence case, verb first
|
||||
Tags: snake_case
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [Ansible Best Practices — Variable Naming](https://docs.ansible.com/ansible/latest/tips_tricks/ansible_tips_tricks.html)
|
||||
- [Ansible Lint — Naming Rules](https://ansible.readthedocs.io/projects/lint/rules/name/)
|
||||
- [Google Shell Style Guide](https://google.github.io/styleguide/shellguide.html) — for script naming inspiration
|
||||
@ -0,0 +1,51 @@
|
||||
# Decision: VM vs LXC for Docker Swarm nodes
|
||||
|
||||
**Date:** 2026-01-10
|
||||
**Status:** Approved
|
||||
**Author:** Chester + FrankGPT
|
||||
|
||||
## Context
|
||||
|
||||
We need to run Docker Swarm manager and worker nodes on Proxmox VE hosts. Two options exist:
|
||||
|
||||
1. **QEMU/KVM Virtual Machines (VMs)**
|
||||
2. **LXC Containers**
|
||||
|
||||
## Decision
|
||||
|
||||
**Use VMs for all Docker Swarm nodes.**
|
||||
|
||||
## Rationale
|
||||
|
||||
| Factor | VM | LXC |
|
||||
| :--- | :--- | :--- |
|
||||
| Docker support | Officially supported | Unsupported (requires hacks) |
|
||||
| Stability | High | Medium (kernel updates can break) |
|
||||
| Isolation | Full kernel isolation | Shared kernel |
|
||||
| Resource overhead | Higher (~1-2 GB RAM baseline) | Lower (~256 MB baseline) |
|
||||
| Maintenance | Standard Ubuntu updates | AppArmor/seccomp tuning required |
|
||||
|
||||
**Trade-off accepted:** We accept the higher resource overhead of VMs in exchange for stability and official Docker support.
|
||||
|
||||
## Specifications
|
||||
|
||||
| Parameter | Value |
|
||||
| :--- | :--- |
|
||||
| Base image | Ubuntu 24.04 LTS (Noble) cloud-init |
|
||||
| Disk | 32 GB per VM |
|
||||
| RAM | 4 GB per VM |
|
||||
| vCPU | 2 per VM |
|
||||
| Network bridge | `vmbr0` (bridged to LAN) |
|
||||
| Storage pool | `local-lvm` |
|
||||
|
||||
## Capacity planning (per physical host)
|
||||
|
||||
- Physical NVMe: 512 GB
|
||||
- Available in `local-lvm`: ~357 GB
|
||||
- Initial allocation: 2 VMs × 32 GB = 64 GB
|
||||
- Remaining: ~293 GB (room for 4+ additional VMs)
|
||||
|
||||
## References
|
||||
|
||||
- [community-scripts/ProxmoxVE docker-vm.sh](https://github.com/community-scripts/ProxmoxVE) — reference implementation
|
||||
- Docker documentation on supported platforms
|
||||
764
ansible/ansible-old/get-docker.sh
Normal file
764
ansible/ansible-old/get-docker.sh
Normal file
@ -0,0 +1,764 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
# Docker Engine for Linux installation script.
|
||||
#
|
||||
# This script is intended as a convenient way to configure docker's package
|
||||
# repositories and to install Docker Engine, This script is not recommended
|
||||
# for production environments. Before running this script, make yourself familiar
|
||||
# with potential risks and limitations, and refer to the installation manual
|
||||
# at https://docs.docker.com/engine/install/ for alternative installation methods.
|
||||
#
|
||||
# The script:
|
||||
#
|
||||
# - Requires `root` or `sudo` privileges to run.
|
||||
# - Attempts to detect your Linux distribution and version and configure your
|
||||
# package management system for you.
|
||||
# - Doesn't allow you to customize most installation parameters.
|
||||
# - Installs dependencies and recommendations without asking for confirmation.
|
||||
# - Installs the latest stable release (by default) of Docker CLI, Docker Engine,
|
||||
# Docker Buildx, Docker Compose, containerd, and runc. When using this script
|
||||
# to provision a machine, this may result in unexpected major version upgrades
|
||||
# of these packages. Always test upgrades in a test environment before
|
||||
# deploying to your production systems.
|
||||
# - Isn't designed to upgrade an existing Docker installation. When using the
|
||||
# script to update an existing installation, dependencies may not be updated
|
||||
# to the expected version, resulting in outdated versions.
|
||||
#
|
||||
# Source code is available at https://github.com/docker/docker-install/
|
||||
#
|
||||
# Usage
|
||||
# ==============================================================================
|
||||
#
|
||||
# To install the latest stable versions of Docker CLI, Docker Engine, and their
|
||||
# dependencies:
|
||||
#
|
||||
# 1. download the script
|
||||
#
|
||||
# $ curl -fsSL https://get.docker.com -o install-docker.sh
|
||||
#
|
||||
# 2. verify the script's content
|
||||
#
|
||||
# $ cat install-docker.sh
|
||||
#
|
||||
# 3. run the script with --dry-run to verify the steps it executes
|
||||
#
|
||||
# $ sh install-docker.sh --dry-run
|
||||
#
|
||||
# 4. run the script either as root, or using sudo to perform the installation.
|
||||
#
|
||||
# $ sudo sh install-docker.sh
|
||||
#
|
||||
# Command-line options
|
||||
# ==============================================================================
|
||||
#
|
||||
# --version <VERSION>
|
||||
# Use the --version option to install a specific version, for example:
|
||||
#
|
||||
# $ sudo sh install-docker.sh --version 23.0
|
||||
#
|
||||
# --channel <stable|test>
|
||||
#
|
||||
# Use the --channel option to install from an alternative installation channel.
|
||||
# The following example installs the latest versions from the "test" channel,
|
||||
# which includes pre-releases (alpha, beta, rc):
|
||||
#
|
||||
# $ sudo sh install-docker.sh --channel test
|
||||
#
|
||||
# Alternatively, use the script at https://test.docker.com, which uses the test
|
||||
# channel as default.
|
||||
#
|
||||
# --mirror <Aliyun|AzureChinaCloud>
|
||||
#
|
||||
# Use the --mirror option to install from a mirror supported by this script.
|
||||
# Available mirrors are "Aliyun" (https://mirrors.aliyun.com/docker-ce), and
|
||||
# "AzureChinaCloud" (https://mirror.azure.cn/docker-ce), for example:
|
||||
#
|
||||
# $ sudo sh install-docker.sh --mirror AzureChinaCloud
|
||||
#
|
||||
# --setup-repo
|
||||
#
|
||||
# Use the --setup-repo option to configure Docker's package repositories without
|
||||
# installing Docker packages. This is useful when you want to add the repository
|
||||
# but install packages separately:
|
||||
#
|
||||
# $ sudo sh install-docker.sh --setup-repo
|
||||
#
|
||||
# Automatic Service Start
|
||||
#
|
||||
# By default, this script automatically starts the Docker daemon and enables the docker
|
||||
# service after installation if systemd is used as init.
|
||||
#
|
||||
# If you prefer to start the service manually, use the --no-autostart option:
|
||||
#
|
||||
# $ sudo sh install-docker.sh --no-autostart
|
||||
#
|
||||
# Note: Starting the service requires appropriate privileges to manage system services.
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
|
||||
# Git commit from https://github.com/docker/docker-install when
|
||||
# the script was uploaded (Should only be modified by upload job):
|
||||
SCRIPT_COMMIT_SHA="f381ee68b32e515bb4dc034b339266aff1fbc460"
|
||||
|
||||
# strip "v" prefix if present
|
||||
VERSION="${VERSION#v}"
|
||||
|
||||
# The channel to install from:
|
||||
# * stable
|
||||
# * test
|
||||
DEFAULT_CHANNEL_VALUE="stable"
|
||||
if [ -z "$CHANNEL" ]; then
|
||||
CHANNEL=$DEFAULT_CHANNEL_VALUE
|
||||
fi
|
||||
|
||||
DEFAULT_DOWNLOAD_URL="https://download.docker.com"
|
||||
if [ -z "$DOWNLOAD_URL" ]; then
|
||||
DOWNLOAD_URL=$DEFAULT_DOWNLOAD_URL
|
||||
fi
|
||||
|
||||
DEFAULT_REPO_FILE="docker-ce.repo"
|
||||
if [ -z "$REPO_FILE" ]; then
|
||||
REPO_FILE="$DEFAULT_REPO_FILE"
|
||||
# Automatically default to a staging repo fora
|
||||
# a staging download url (download-stage.docker.com)
|
||||
case "$DOWNLOAD_URL" in
|
||||
*-stage*) REPO_FILE="docker-ce-staging.repo";;
|
||||
esac
|
||||
fi
|
||||
|
||||
mirror=''
|
||||
DRY_RUN=${DRY_RUN:-}
|
||||
REPO_ONLY=${REPO_ONLY:-0}
|
||||
NO_AUTOSTART=${NO_AUTOSTART:-0}
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--channel)
|
||||
CHANNEL="$2"
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
;;
|
||||
--mirror)
|
||||
mirror="$2"
|
||||
shift
|
||||
;;
|
||||
--version)
|
||||
VERSION="${2#v}"
|
||||
shift
|
||||
;;
|
||||
--setup-repo)
|
||||
REPO_ONLY=1
|
||||
shift
|
||||
;;
|
||||
--no-autostart)
|
||||
NO_AUTOSTART=1
|
||||
;;
|
||||
--*)
|
||||
echo "Illegal option $1"
|
||||
;;
|
||||
esac
|
||||
shift $(( $# > 0 ? 1 : 0 ))
|
||||
done
|
||||
|
||||
case "$mirror" in
|
||||
Aliyun)
|
||||
DOWNLOAD_URL="https://mirrors.aliyun.com/docker-ce"
|
||||
;;
|
||||
AzureChinaCloud)
|
||||
DOWNLOAD_URL="https://mirror.azure.cn/docker-ce"
|
||||
;;
|
||||
"")
|
||||
;;
|
||||
*)
|
||||
>&2 echo "unknown mirror '$mirror': use either 'Aliyun', or 'AzureChinaCloud'."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$CHANNEL" in
|
||||
stable|test)
|
||||
;;
|
||||
*)
|
||||
>&2 echo "unknown CHANNEL '$CHANNEL': use either stable or test."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
command_exists() {
|
||||
command -v "$@" > /dev/null 2>&1
|
||||
}
|
||||
|
||||
# version_gte checks if the version specified in $VERSION is at least the given
|
||||
# SemVer (Maj.Minor[.Patch]), or CalVer (YY.MM) version.It returns 0 (success)
|
||||
# if $VERSION is either unset (=latest) or newer or equal than the specified
|
||||
# version, or returns 1 (fail) otherwise.
|
||||
#
|
||||
# examples:
|
||||
#
|
||||
# VERSION=23.0
|
||||
# version_gte 23.0 // 0 (success)
|
||||
# version_gte 20.10 // 0 (success)
|
||||
# version_gte 19.03 // 0 (success)
|
||||
# version_gte 26.1 // 1 (fail)
|
||||
version_gte() {
|
||||
if [ -z "$VERSION" ]; then
|
||||
return 0
|
||||
fi
|
||||
version_compare "$VERSION" "$1"
|
||||
}
|
||||
|
||||
# version_compare compares two version strings (either SemVer (Major.Minor.Path),
|
||||
# or CalVer (YY.MM) version strings. It returns 0 (success) if version A is newer
|
||||
# or equal than version B, or 1 (fail) otherwise. Patch releases and pre-release
|
||||
# (-alpha/-beta) are not taken into account
|
||||
#
|
||||
# examples:
|
||||
#
|
||||
# version_compare 23.0.0 20.10 // 0 (success)
|
||||
# version_compare 23.0 20.10 // 0 (success)
|
||||
# version_compare 20.10 19.03 // 0 (success)
|
||||
# version_compare 20.10 20.10 // 0 (success)
|
||||
# version_compare 19.03 20.10 // 1 (fail)
|
||||
version_compare() (
|
||||
set +x
|
||||
|
||||
yy_a="$(echo "$1" | cut -d'.' -f1)"
|
||||
yy_b="$(echo "$2" | cut -d'.' -f1)"
|
||||
if [ "$yy_a" -lt "$yy_b" ]; then
|
||||
return 1
|
||||
fi
|
||||
if [ "$yy_a" -gt "$yy_b" ]; then
|
||||
return 0
|
||||
fi
|
||||
mm_a="$(echo "$1" | cut -d'.' -f2)"
|
||||
mm_b="$(echo "$2" | cut -d'.' -f2)"
|
||||
|
||||
# trim leading zeros to accommodate CalVer
|
||||
mm_a="${mm_a#0}"
|
||||
mm_b="${mm_b#0}"
|
||||
|
||||
if [ "${mm_a:-0}" -lt "${mm_b:-0}" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
)
|
||||
|
||||
is_dry_run() {
|
||||
if [ -z "$DRY_RUN" ]; then
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
is_wsl() {
|
||||
case "$(uname -r)" in
|
||||
*microsoft* ) true ;; # WSL 2
|
||||
*Microsoft* ) true ;; # WSL 1
|
||||
* ) false;;
|
||||
esac
|
||||
}
|
||||
|
||||
is_darwin() {
|
||||
case "$(uname -s)" in
|
||||
*darwin* ) true ;;
|
||||
*Darwin* ) true ;;
|
||||
* ) false;;
|
||||
esac
|
||||
}
|
||||
|
||||
deprecation_notice() {
|
||||
distro=$1
|
||||
distro_version=$2
|
||||
echo
|
||||
printf "\033[91;1mDEPRECATION WARNING\033[0m\n"
|
||||
printf " This Linux distribution (\033[1m%s %s\033[0m) reached end-of-life and is no longer supported by this script.\n" "$distro" "$distro_version"
|
||||
echo " No updates or security fixes will be released for this distribution, and users are recommended"
|
||||
echo " to upgrade to a currently maintained version of $distro."
|
||||
echo
|
||||
printf "Press \033[1mCtrl+C\033[0m now to abort this script, or wait for the installation to continue."
|
||||
echo
|
||||
sleep 10
|
||||
}
|
||||
|
||||
get_distribution() {
|
||||
lsb_dist=""
|
||||
# Every system that we officially support has /etc/os-release
|
||||
if [ -r /etc/os-release ]; then
|
||||
lsb_dist="$(. /etc/os-release && echo "$ID")"
|
||||
fi
|
||||
# Returning an empty string here should be alright since the
|
||||
# case statements don't act unless you provide an actual value
|
||||
echo "$lsb_dist"
|
||||
}
|
||||
|
||||
start_docker_daemon() {
|
||||
# Use systemctl if available (for systemd-based systems)
|
||||
if command_exists systemctl; then
|
||||
is_dry_run || >&2 echo "Using systemd to manage Docker service"
|
||||
if (
|
||||
is_dry_run || set -x
|
||||
$sh_c systemctl enable --now docker.service 2>/dev/null
|
||||
); then
|
||||
is_dry_run || echo "INFO: Docker daemon enabled and started" >&2
|
||||
else
|
||||
is_dry_run || echo "WARNING: unable to enable the docker service" >&2
|
||||
fi
|
||||
else
|
||||
# No service management available (container environment)
|
||||
if ! is_dry_run; then
|
||||
>&2 echo "Note: Running in a container environment without service management"
|
||||
>&2 echo "Docker daemon cannot be started automatically in this environment"
|
||||
>&2 echo "The Docker packages have been installed successfully"
|
||||
fi
|
||||
fi
|
||||
>&2 echo
|
||||
}
|
||||
|
||||
echo_docker_as_nonroot() {
|
||||
if is_dry_run; then
|
||||
return
|
||||
fi
|
||||
if command_exists docker && [ -e /var/run/docker.sock ]; then
|
||||
(
|
||||
set -x
|
||||
$sh_c 'docker version'
|
||||
) || true
|
||||
fi
|
||||
|
||||
# intentionally mixed spaces and tabs here -- tabs are stripped by "<<-EOF", spaces are kept in the output
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo
|
||||
if version_gte "20.10"; then
|
||||
echo "To run Docker as a non-privileged user, consider setting up the"
|
||||
echo "Docker daemon in rootless mode for your user:"
|
||||
echo
|
||||
echo " dockerd-rootless-setuptool.sh install"
|
||||
echo
|
||||
echo "Visit https://docs.docker.com/go/rootless/ to learn about rootless mode."
|
||||
echo
|
||||
fi
|
||||
echo
|
||||
echo "To run the Docker daemon as a fully privileged service, but granting non-root"
|
||||
echo "users access, refer to https://docs.docker.com/go/daemon-access/"
|
||||
echo
|
||||
echo "WARNING: Access to the remote API on a privileged Docker daemon is equivalent"
|
||||
echo " to root access on the host. Refer to the 'Docker daemon attack surface'"
|
||||
echo " documentation for details: https://docs.docker.com/go/attack-surface/"
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo
|
||||
}
|
||||
|
||||
# Check if this is a forked Linux distro
|
||||
check_forked() {
|
||||
|
||||
# Check for lsb_release command existence, it usually exists in forked distros
|
||||
if command_exists lsb_release; then
|
||||
# Check if the `-u` option is supported
|
||||
set +e
|
||||
lsb_release -a -u > /dev/null 2>&1
|
||||
lsb_release_exit_code=$?
|
||||
set -e
|
||||
|
||||
# Check if the command has exited successfully, it means we're in a forked distro
|
||||
if [ "$lsb_release_exit_code" = "0" ]; then
|
||||
# Print info about current distro
|
||||
cat <<-EOF
|
||||
You're using '$lsb_dist' version '$dist_version'.
|
||||
EOF
|
||||
|
||||
# Get the upstream release info
|
||||
lsb_dist=$(lsb_release -a -u 2>&1 | tr '[:upper:]' '[:lower:]' | grep -E 'id' | cut -d ':' -f 2 | tr -d '[:space:]')
|
||||
dist_version=$(lsb_release -a -u 2>&1 | tr '[:upper:]' '[:lower:]' | grep -E 'codename' | cut -d ':' -f 2 | tr -d '[:space:]')
|
||||
|
||||
# Print info about upstream distro
|
||||
cat <<-EOF
|
||||
Upstream release is '$lsb_dist' version '$dist_version'.
|
||||
EOF
|
||||
else
|
||||
if [ -r /etc/debian_version ] && [ "$lsb_dist" != "ubuntu" ] && [ "$lsb_dist" != "raspbian" ]; then
|
||||
if [ "$lsb_dist" = "osmc" ]; then
|
||||
# OSMC runs Raspbian
|
||||
lsb_dist=raspbian
|
||||
else
|
||||
# We're Debian and don't even know it!
|
||||
lsb_dist=debian
|
||||
fi
|
||||
dist_version="$(sed 's/\/.*//' /etc/debian_version | sed 's/\..*//')"
|
||||
case "$dist_version" in
|
||||
13|14|forky)
|
||||
dist_version="trixie"
|
||||
;;
|
||||
12)
|
||||
dist_version="bookworm"
|
||||
;;
|
||||
11)
|
||||
dist_version="bullseye"
|
||||
;;
|
||||
10)
|
||||
dist_version="buster"
|
||||
;;
|
||||
9)
|
||||
dist_version="stretch"
|
||||
;;
|
||||
8)
|
||||
dist_version="jessie"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
do_install() {
|
||||
echo "# Executing docker install script, commit: $SCRIPT_COMMIT_SHA"
|
||||
|
||||
if command_exists docker; then
|
||||
cat >&2 <<-'EOF'
|
||||
Warning: the "docker" command appears to already exist on this system.
|
||||
|
||||
If you already have Docker installed, this script can cause trouble, which is
|
||||
why we're displaying this warning and provide the opportunity to cancel the
|
||||
installation.
|
||||
|
||||
If you installed the current Docker package using this script and are using it
|
||||
again to update Docker, you can ignore this message, but be aware that the
|
||||
script resets any custom changes in the deb and rpm repo configuration
|
||||
files to match the parameters passed to the script.
|
||||
|
||||
You may press Ctrl+C now to abort this script.
|
||||
EOF
|
||||
( set -x; sleep 20 )
|
||||
fi
|
||||
|
||||
user="$(id -un 2>/dev/null || true)"
|
||||
|
||||
sh_c='sh -c'
|
||||
if [ "$user" != 'root' ]; then
|
||||
if command_exists sudo; then
|
||||
sh_c='sudo -E sh -c'
|
||||
elif command_exists su; then
|
||||
sh_c='su -c'
|
||||
else
|
||||
cat >&2 <<-'EOF'
|
||||
Error: this installer needs the ability to run commands as root.
|
||||
We are unable to find either "sudo" or "su" available to make this happen.
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if is_dry_run; then
|
||||
sh_c="echo"
|
||||
fi
|
||||
|
||||
# perform some very rudimentary platform detection
|
||||
lsb_dist=$( get_distribution )
|
||||
lsb_dist="$(echo "$lsb_dist" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
if is_wsl; then
|
||||
echo
|
||||
echo "WSL DETECTED: We recommend using Docker Desktop for Windows."
|
||||
echo "Please get Docker Desktop from https://www.docker.com/products/docker-desktop/"
|
||||
echo
|
||||
cat >&2 <<-'EOF'
|
||||
|
||||
You may press Ctrl+C now to abort this script.
|
||||
EOF
|
||||
( set -x; sleep 20 )
|
||||
fi
|
||||
|
||||
case "$lsb_dist" in
|
||||
|
||||
ubuntu)
|
||||
if command_exists lsb_release; then
|
||||
dist_version="$(lsb_release --codename | cut -f2)"
|
||||
fi
|
||||
if [ -z "$dist_version" ] && [ -r /etc/lsb-release ]; then
|
||||
dist_version="$(. /etc/lsb-release && echo "$DISTRIB_CODENAME")"
|
||||
fi
|
||||
;;
|
||||
|
||||
debian|raspbian)
|
||||
dist_version="$(sed 's/\/.*//' /etc/debian_version | sed 's/\..*//')"
|
||||
case "$dist_version" in
|
||||
13)
|
||||
dist_version="trixie"
|
||||
;;
|
||||
12)
|
||||
dist_version="bookworm"
|
||||
;;
|
||||
11)
|
||||
dist_version="bullseye"
|
||||
;;
|
||||
10)
|
||||
dist_version="buster"
|
||||
;;
|
||||
9)
|
||||
dist_version="stretch"
|
||||
;;
|
||||
8)
|
||||
dist_version="jessie"
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
|
||||
centos|rhel)
|
||||
if [ -z "$dist_version" ] && [ -r /etc/os-release ]; then
|
||||
dist_version="$(. /etc/os-release && echo "$VERSION_ID")"
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
if command_exists lsb_release; then
|
||||
dist_version="$(lsb_release --release | cut -f2)"
|
||||
fi
|
||||
if [ -z "$dist_version" ] && [ -r /etc/os-release ]; then
|
||||
dist_version="$(. /etc/os-release && echo "$VERSION_ID")"
|
||||
fi
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# Check if this is a forked Linux distro
|
||||
check_forked
|
||||
|
||||
# Print deprecation warnings for distro versions that recently reached EOL,
|
||||
# but may still be commonly used (especially LTS versions).
|
||||
case "$lsb_dist.$dist_version" in
|
||||
centos.8|centos.7|rhel.7)
|
||||
deprecation_notice "$lsb_dist" "$dist_version"
|
||||
;;
|
||||
debian.buster|debian.stretch|debian.jessie)
|
||||
deprecation_notice "$lsb_dist" "$dist_version"
|
||||
;;
|
||||
raspbian.buster|raspbian.stretch|raspbian.jessie)
|
||||
deprecation_notice "$lsb_dist" "$dist_version"
|
||||
;;
|
||||
ubuntu.focal|ubuntu.bionic|ubuntu.xenial|ubuntu.trusty)
|
||||
deprecation_notice "$lsb_dist" "$dist_version"
|
||||
;;
|
||||
ubuntu.oracular|ubuntu.mantic|ubuntu.lunar|ubuntu.kinetic|ubuntu.impish|ubuntu.hirsute|ubuntu.groovy|ubuntu.eoan|ubuntu.disco|ubuntu.cosmic)
|
||||
deprecation_notice "$lsb_dist" "$dist_version"
|
||||
;;
|
||||
fedora.*)
|
||||
if [ "$dist_version" -lt 41 ]; then
|
||||
deprecation_notice "$lsb_dist" "$dist_version"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# Run setup for each distro accordingly
|
||||
case "$lsb_dist" in
|
||||
ubuntu|debian|raspbian)
|
||||
pre_reqs="ca-certificates curl"
|
||||
apt_repo="deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] $DOWNLOAD_URL/linux/$lsb_dist $dist_version $CHANNEL"
|
||||
(
|
||||
if ! is_dry_run; then
|
||||
set -x
|
||||
fi
|
||||
$sh_c 'apt-get -qq update >/dev/null'
|
||||
$sh_c "DEBIAN_FRONTEND=noninteractive apt-get -y -qq install $pre_reqs >/dev/null"
|
||||
$sh_c 'install -m 0755 -d /etc/apt/keyrings'
|
||||
$sh_c "curl -fsSL \"$DOWNLOAD_URL/linux/$lsb_dist/gpg\" -o /etc/apt/keyrings/docker.asc"
|
||||
$sh_c "chmod a+r /etc/apt/keyrings/docker.asc"
|
||||
$sh_c "echo \"$apt_repo\" > /etc/apt/sources.list.d/docker.list"
|
||||
$sh_c 'apt-get -qq update >/dev/null'
|
||||
)
|
||||
|
||||
if [ "$REPO_ONLY" = "1" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
pkg_version=""
|
||||
if [ -n "$VERSION" ]; then
|
||||
if is_dry_run; then
|
||||
echo "# WARNING: VERSION pinning is not supported in DRY_RUN"
|
||||
else
|
||||
# Will work for incomplete versions IE (17.12), but may not actually grab the "latest" if in the test channel
|
||||
pkg_pattern="$(echo "$VERSION" | sed 's/-ce-/~ce~.*/g' | sed 's/-/.*/g')"
|
||||
search_command="apt-cache madison docker-ce | grep '$pkg_pattern' | head -1 | awk '{\$1=\$1};1' | cut -d' ' -f 3"
|
||||
pkg_version="$($sh_c "$search_command")"
|
||||
echo "INFO: Searching repository for VERSION '$VERSION'"
|
||||
echo "INFO: $search_command"
|
||||
if [ -z "$pkg_version" ]; then
|
||||
echo
|
||||
echo "ERROR: '$VERSION' not found amongst apt-cache madison results"
|
||||
echo
|
||||
exit 1
|
||||
fi
|
||||
if version_gte "18.09"; then
|
||||
search_command="apt-cache madison docker-ce-cli | grep '$pkg_pattern' | head -1 | awk '{\$1=\$1};1' | cut -d' ' -f 3"
|
||||
echo "INFO: $search_command"
|
||||
cli_pkg_version="=$($sh_c "$search_command")"
|
||||
fi
|
||||
pkg_version="=$pkg_version"
|
||||
fi
|
||||
fi
|
||||
(
|
||||
pkgs="docker-ce${pkg_version%=}"
|
||||
if version_gte "18.09"; then
|
||||
# older versions didn't ship the cli and containerd as separate packages
|
||||
pkgs="$pkgs docker-ce-cli${cli_pkg_version%=} containerd.io"
|
||||
fi
|
||||
if version_gte "20.10"; then
|
||||
pkgs="$pkgs docker-compose-plugin docker-ce-rootless-extras$pkg_version"
|
||||
fi
|
||||
if version_gte "23.0"; then
|
||||
pkgs="$pkgs docker-buildx-plugin"
|
||||
fi
|
||||
if version_gte "28.2"; then
|
||||
pkgs="$pkgs docker-model-plugin"
|
||||
fi
|
||||
if ! is_dry_run; then
|
||||
set -x
|
||||
fi
|
||||
$sh_c "DEBIAN_FRONTEND=noninteractive apt-get -y -qq install $pkgs >/dev/null"
|
||||
)
|
||||
if [ "$NO_AUTOSTART" != "1" ]; then
|
||||
start_docker_daemon
|
||||
fi
|
||||
echo_docker_as_nonroot
|
||||
exit 0
|
||||
;;
|
||||
centos|fedora|rhel)
|
||||
if [ "$(uname -m)" = "s390x" ]; then
|
||||
echo "Effective v27.5, please consult RHEL distro statement for s390x support."
|
||||
exit 1
|
||||
fi
|
||||
repo_file_url="$DOWNLOAD_URL/linux/$lsb_dist/$REPO_FILE"
|
||||
(
|
||||
if ! is_dry_run; then
|
||||
set -x
|
||||
fi
|
||||
if command_exists dnf5; then
|
||||
$sh_c "dnf -y -q --setopt=install_weak_deps=False install dnf-plugins-core"
|
||||
$sh_c "dnf5 config-manager addrepo --overwrite --save-filename=docker-ce.repo --from-repofile='$repo_file_url'"
|
||||
|
||||
if [ "$CHANNEL" != "stable" ]; then
|
||||
$sh_c "dnf5 config-manager setopt \"docker-ce-*.enabled=0\""
|
||||
$sh_c "dnf5 config-manager setopt \"docker-ce-$CHANNEL.enabled=1\""
|
||||
fi
|
||||
$sh_c "dnf makecache"
|
||||
elif command_exists dnf; then
|
||||
$sh_c "dnf -y -q --setopt=install_weak_deps=False install dnf-plugins-core"
|
||||
$sh_c "rm -f /etc/yum.repos.d/docker-ce.repo /etc/yum.repos.d/docker-ce-staging.repo"
|
||||
$sh_c "dnf config-manager --add-repo $repo_file_url"
|
||||
|
||||
if [ "$CHANNEL" != "stable" ]; then
|
||||
$sh_c "dnf config-manager --set-disabled \"docker-ce-*\""
|
||||
$sh_c "dnf config-manager --set-enabled \"docker-ce-$CHANNEL\""
|
||||
fi
|
||||
$sh_c "dnf makecache"
|
||||
else
|
||||
$sh_c "yum -y -q install yum-utils"
|
||||
$sh_c "rm -f /etc/yum.repos.d/docker-ce.repo /etc/yum.repos.d/docker-ce-staging.repo"
|
||||
$sh_c "yum-config-manager --add-repo $repo_file_url"
|
||||
|
||||
if [ "$CHANNEL" != "stable" ]; then
|
||||
$sh_c "yum-config-manager --disable \"docker-ce-*\""
|
||||
$sh_c "yum-config-manager --enable \"docker-ce-$CHANNEL\""
|
||||
fi
|
||||
$sh_c "yum makecache"
|
||||
fi
|
||||
)
|
||||
|
||||
if [ "$REPO_ONLY" = "1" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
pkg_version=""
|
||||
if command_exists dnf; then
|
||||
pkg_manager="dnf"
|
||||
pkg_manager_flags="-y -q --best"
|
||||
else
|
||||
pkg_manager="yum"
|
||||
pkg_manager_flags="-y -q"
|
||||
fi
|
||||
if [ -n "$VERSION" ]; then
|
||||
if is_dry_run; then
|
||||
echo "# WARNING: VERSION pinning is not supported in DRY_RUN"
|
||||
else
|
||||
if [ "$lsb_dist" = "fedora" ]; then
|
||||
pkg_suffix="fc$dist_version"
|
||||
else
|
||||
pkg_suffix="el"
|
||||
fi
|
||||
pkg_pattern="$(echo "$VERSION" | sed 's/-ce-/\\\\.ce.*/g' | sed 's/-/.*/g').*$pkg_suffix"
|
||||
search_command="$pkg_manager list --showduplicates docker-ce | grep '$pkg_pattern' | tail -1 | awk '{print \$2}'"
|
||||
pkg_version="$($sh_c "$search_command")"
|
||||
echo "INFO: Searching repository for VERSION '$VERSION'"
|
||||
echo "INFO: $search_command"
|
||||
if [ -z "$pkg_version" ]; then
|
||||
echo
|
||||
echo "ERROR: '$VERSION' not found amongst $pkg_manager list results"
|
||||
echo
|
||||
exit 1
|
||||
fi
|
||||
if version_gte "18.09"; then
|
||||
# older versions don't support a cli package
|
||||
search_command="$pkg_manager list --showduplicates docker-ce-cli | grep '$pkg_pattern' | tail -1 | awk '{print \$2}'"
|
||||
cli_pkg_version="$($sh_c "$search_command" | cut -d':' -f 2)"
|
||||
fi
|
||||
# Cut out the epoch and prefix with a '-'
|
||||
pkg_version="-$(echo "$pkg_version" | cut -d':' -f 2)"
|
||||
fi
|
||||
fi
|
||||
(
|
||||
pkgs="docker-ce$pkg_version"
|
||||
if version_gte "18.09"; then
|
||||
# older versions didn't ship the cli and containerd as separate packages
|
||||
if [ -n "$cli_pkg_version" ]; then
|
||||
pkgs="$pkgs docker-ce-cli-$cli_pkg_version containerd.io"
|
||||
else
|
||||
pkgs="$pkgs docker-ce-cli containerd.io"
|
||||
fi
|
||||
fi
|
||||
if version_gte "20.10"; then
|
||||
pkgs="$pkgs docker-compose-plugin docker-ce-rootless-extras$pkg_version"
|
||||
fi
|
||||
if version_gte "23.0"; then
|
||||
pkgs="$pkgs docker-buildx-plugin docker-model-plugin"
|
||||
fi
|
||||
if ! is_dry_run; then
|
||||
set -x
|
||||
fi
|
||||
$sh_c "$pkg_manager $pkg_manager_flags install $pkgs"
|
||||
)
|
||||
if [ "$NO_AUTOSTART" != "1" ]; then
|
||||
start_docker_daemon
|
||||
fi
|
||||
echo_docker_as_nonroot
|
||||
exit 0
|
||||
;;
|
||||
sles)
|
||||
echo "Effective v27.5, please consult SLES distro statement for s390x support."
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
if [ -z "$lsb_dist" ]; then
|
||||
if is_darwin; then
|
||||
echo
|
||||
echo "ERROR: Unsupported operating system 'macOS'"
|
||||
echo "Please get Docker Desktop from https://www.docker.com/products/docker-desktop"
|
||||
echo
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo
|
||||
echo "ERROR: Unsupported distribution '$lsb_dist'"
|
||||
echo
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
exit 1
|
||||
}
|
||||
|
||||
# wrapped up in a function so that we have some protection against only getting
|
||||
# half the file during "curl | sh"
|
||||
do_install
|
||||
262
ansible/ansible-old/group_vars/all.yml
Normal file
262
ansible/ansible-old/group_vars/all.yml
Normal file
@ -0,0 +1,262 @@
|
||||
# Central YAML Source of Truth for Nathan's Lab (2026)
|
||||
# Edit and commit this file; Ansible playbooks should read this as canonical.
|
||||
lab_name: "nathan-lab-2026"
|
||||
canonical_source: "ansible/group_vars/all.yml"
|
||||
|
||||
# The standard operational user created on every managed host.
|
||||
# Override per-host in host_vars/ if a node uses a different login.
|
||||
lab_ansible_user: "chester"
|
||||
|
||||
# Omada Open API credentials are sourced from the encrypted vault file.
|
||||
omada_client_id: "{{ vault_omada_client_id }}"
|
||||
omada_client_secret: "{{ vault_omada_client_secret }}"
|
||||
omada_id: "{{ vault_omada_id }}"
|
||||
omada_base_url: "{{ vault_omada_base_url }}"
|
||||
|
||||
networks:
|
||||
main:
|
||||
vlan: 1
|
||||
cidr: "10.0.0.0/24"
|
||||
dhcp_pool: "10.0.0.100-10.0.0.240"
|
||||
gateway: "10.0.0.1"
|
||||
purpose: "Family / wired / main SSID"
|
||||
|
||||
infra:
|
||||
vlan: 10
|
||||
cidr: "10.0.10.0/24"
|
||||
reserved: "10.0.10.2-10.0.10.50"
|
||||
purpose: "Management / Proxmox / NAS / Heimdall mgmt"
|
||||
|
||||
iot:
|
||||
vlan: 50
|
||||
cidr: "10.0.50.0/24"
|
||||
dhcp_pool: "10.0.50.100-10.0.50.199"
|
||||
purpose: "IoT devices (Omada)"
|
||||
|
||||
guest:
|
||||
vlan: 30
|
||||
cidr: "10.0.30.0/24"
|
||||
dhcp_pool: "10.0.30.100-10.0.30.200"
|
||||
purpose: "Guest WiFi (isolated)"
|
||||
|
||||
compute:
|
||||
vlan: 200
|
||||
cidr: "10.0.200.0/24"
|
||||
purpose: "Swarm / AI grid / ephemeral compute"
|
||||
|
||||
lab_hosts:
|
||||
er7212pc:
|
||||
role: gateway
|
||||
current_ip: "10.0.0.2"
|
||||
desired_ip: "10.0.0.2"
|
||||
note: "DHCP + Omada controller"
|
||||
|
||||
pve01:
|
||||
physical_backing_host: "pve04"
|
||||
role: proxmox
|
||||
current_ip: "10.0.0.201"
|
||||
desired_ip: "10.0.10.11"
|
||||
|
||||
pve02:
|
||||
role: proxmox
|
||||
current_ip: "10.0.0.202"
|
||||
desired_ip: "10.0.10.12"
|
||||
|
||||
pve03:
|
||||
role: proxmox
|
||||
current_ip: "10.0.0.203"
|
||||
desired_ip: "10.0.10.13"
|
||||
|
||||
pve04:
|
||||
replacement_status: "retired-identity-now-backing-pve01"
|
||||
role: retired_physical_alias
|
||||
current_ip: "10.0.0.204"
|
||||
desired_ip: "10.0.10.14"
|
||||
|
||||
swarm-manager-1:
|
||||
current_ip: "10.0.0.211"
|
||||
desired_ip: "10.0.200.11"
|
||||
|
||||
swarm-manager-2:
|
||||
current_ip: "10.0.0.212"
|
||||
desired_ip: "10.0.200.12"
|
||||
|
||||
swarm-manager-3:
|
||||
current_ip: "10.0.0.213"
|
||||
desired_ip: "10.0.200.13"
|
||||
|
||||
statler:
|
||||
role: standalone_vm
|
||||
current_ip: "10.0.0.210"
|
||||
desired_ip: "10.0.0.210"
|
||||
hypervisor_host: "pve02"
|
||||
note: "Standalone Ubuntu 24.04 VM planned on pve02 with 2 vCPU, 10 GB RAM, and 32 GB disk."
|
||||
|
||||
swarm-worker-1:
|
||||
current_ip: "10.0.0.221"
|
||||
desired_ip: "10.0.200.21"
|
||||
|
||||
swarm-worker-2:
|
||||
current_ip: "10.0.0.222"
|
||||
desired_ip: "10.0.200.22"
|
||||
|
||||
swarm-worker-3:
|
||||
current_ip: "10.0.0.223"
|
||||
desired_ip: "10.0.200.23"
|
||||
|
||||
ai-lenovo:
|
||||
current_ip: "10.0.0.220"
|
||||
desired_ip: "10.0.200.20"
|
||||
onboarding_status: "tbd-needs-onboarding-like-heimdall"
|
||||
ansible_managed: false
|
||||
note: "Pending onboarding workflow before inclusion in active automation and monitoring groups."
|
||||
|
||||
synology:
|
||||
current_ip: "10.0.0.249"
|
||||
desired_ip: "10.0.10.40"
|
||||
|
||||
terramaster:
|
||||
current_ip: "10.0.0.250"
|
||||
desired_ip: "10.0.10.41"
|
||||
|
||||
waldorf:
|
||||
current_ip: "10.0.0.251"
|
||||
desired_ip: "10.0.200.30"
|
||||
lifecycle_status: "retired-shutdown"
|
||||
ansible_managed: false
|
||||
monitoring_enabled: false
|
||||
note: "Retired host; excluded from active monitoring and deployment inventories."
|
||||
|
||||
watchtower:
|
||||
current_ip: "10.0.0.200"
|
||||
desired_ip: "10.0.10.200"
|
||||
|
||||
heimdall:
|
||||
role: beelink
|
||||
current_ip: null
|
||||
desired_ip:
|
||||
mgmt: "10.0.10.2"
|
||||
lan: "10.0.0.50"
|
||||
|
||||
# === MONITORING INFRASTRUCTURE ===
|
||||
# Environment-specific configuration for monitoring stack
|
||||
monitoring:
|
||||
stack_user: "chester"
|
||||
heimdall_redis: "10.0.0.151:6379"
|
||||
watchtower_ip: "10.0.0.200"
|
||||
grafana_domain: "grafana.castaldifamily.com"
|
||||
uptime_domain: "status.castaldifamily.com"
|
||||
dozzle_domain: "logs.castaldifamily.com"
|
||||
authentik_host: "https://sso.castaldifamily.com"
|
||||
# grafana_admin_password: DEFINE IN VAULT
|
||||
|
||||
# === EDGE ROUTING TOPOLOGY ===
|
||||
# Canonical ingress model: Traefik runs on a dedicated edge host outside Swarm.
|
||||
# Swarm and standalone hosts publish routes through traefik-kop agents.
|
||||
edge_routing:
|
||||
ingress_mode: "external-traefik"
|
||||
edge_host:
|
||||
name: "heimdall"
|
||||
ip: "10.0.0.151"
|
||||
ssh_port: 22
|
||||
http_port: 80
|
||||
https_port: 443
|
||||
integration:
|
||||
# Watchtower-hosted traefik-kop instance (publishes Watchtower container routes)
|
||||
agent_image: "ghcr.io/jittering/traefik-kop:latest"
|
||||
redis_addr: "10.0.0.151:6379"
|
||||
bind_ip: "10.0.0.200" # Watchtower IP — correct for routes originating on Watchtower
|
||||
swarm:
|
||||
# Swarm-hosted traefik-kop instance (publishes Swarm service routes)
|
||||
# bind_ip MUST be a Swarm node IP — the Swarm routing mesh makes published
|
||||
# ports available on ALL nodes, so Traefik routes inbound requests here.
|
||||
bind_ip: "10.0.0.212" # swarm-manager-2 (current Leader; was swarm-manager-1 before it went down)
|
||||
proxy_network: "proxy-net" # Swarm overlay network; separate from heimdall's bridge of same name
|
||||
stack_deploy_target: "swarm-manager-2"
|
||||
migration_rules:
|
||||
deploy_traefik_in_swarm: false
|
||||
use_external_proxy_network: true
|
||||
notes:
|
||||
- "Services should attach to swarm overlay proxy-net for east-west traffic."
|
||||
- "Ingress is terminated by external Traefik at 10.0.0.151 via traefik-kop updates."
|
||||
|
||||
# Per-stack placement node overrides.
|
||||
# Update when the deploy target node changes (e.g., after node replacement).
|
||||
gitea_placement_node: "swarm-manager-2"
|
||||
authentik_placement_node: "swarm-manager-2"
|
||||
|
||||
# === SERVICE SECRETS (set via: ansible-vault encrypt_string) ===
|
||||
vault_gitea_db_password: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
34623365623337336535656164623637656633356661373162356438646637333932663765323134
|
||||
6261626565646166353966393366666434356434333263330a333666393765646233303663363738
|
||||
65616665393235323132623462373435373637363262363539626163373061643930393730346633
|
||||
3232373866663034310a343661306634313766313765623439626339353635626232663662323365
|
||||
6666
|
||||
vault_authentik_secret_key: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
61373834613362356638303166376135613133616139613963333632613430636136623062373161
|
||||
6335636331386565386139376234663362396361653463660a613834313263653039376363396264
|
||||
62383166346563326630323734643462326438643436626565656633636234323835333033353130
|
||||
3535306539626339320a323431666164353038323166633663656265613266366535623130323165
|
||||
38353833393934393764376331333464663337616432623033303830393464303966643036656538
|
||||
34396337363163663566383063396130616530633363636461343531636438303963653733343830
|
||||
66636165656563653164383364643032373135666263316137623761656332316130313235623232
|
||||
33623462343639366566
|
||||
vault_authentik_postgres_password: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
37356530373764353038343038663662333535323436336663613239333234363036626462656130
|
||||
3138313535353838306563663565663230646561313234390a313166623232383364623766383961
|
||||
30363065373065353365616239663562333833313139636137616561616465656462613238323932
|
||||
3630333538366430370a616263633263336436303662373530323161316534313737366633643535
|
||||
30326636383131353265613463363431666536313966366364666564623637343737
|
||||
vlan_defaults:
|
||||
dns_domain: "home.lab"
|
||||
ntp_servers:
|
||||
- "10.0.10.2"
|
||||
|
||||
# Plex bootstrap claim token — used only on first server claim.
|
||||
vault_plex_claim: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
31373365323534353264373735363937623566646633653434613038396463303164396138306661
|
||||
3130323134656463383835366130663632323561326265350a653162643064643563383738373637
|
||||
36363135613735663037303036613637313431336139343430313963393930303532666366336365
|
||||
3734386639393336310a323964386233346134616164656663393731376632643037313734323830
|
||||
65366334356531623339643066373237306263323063383963363330346665316435
|
||||
|
||||
# Authentik outpost tokens for standalone arr services on statler.
|
||||
vault_authentik_token_sonarr: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
39303463306665356436626265653339663163613464366237663234376135306366303739343266
|
||||
3762646230666263393330373833393037613165373337380a336663646161613534353232663761
|
||||
65376666663063643066323831366265633337653630666235636234393130646361383032383032
|
||||
3433393235633762390a376561303866373739613663333461643938353931626134336665383164
|
||||
34346538376436313438313733393963303735646632323739313137626466356138636266396434
|
||||
61363737636139386665616438646439366139303739646530316566373563306565623637363661
|
||||
343938653662646132373565303836353030
|
||||
vault_authentik_token_radarr: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
32363735353663623031356362323765616232326234333564323839626236653634626263313765
|
||||
6335653537656531396431366662616163366166633462390a346363633364363866373732373939
|
||||
61666261616266333465393837383337313565613539303732396530333833666563653139353238
|
||||
6537383336613933370a333662323339396463353134363635383430353133646331376533303861
|
||||
30303765373566353633643261376430363837386239363261396235333033636563366231323564
|
||||
35643564663866653831663633333436653330363130656631356166363731356639643238656530
|
||||
643062636137396333383438623534346636
|
||||
vault_authentik_token_sabnzbd: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
30373635366337343236353866623234383665386461356637353534666461613466373463616531
|
||||
3837646263643864636331343364663563666531333861660a626335393762353862663564656465
|
||||
61373430336336373062623563633832383261333035353432666265313435363132316561383130
|
||||
3236643962313765630a386634313331643639363035623663616166313532623932643162633762
|
||||
64353335393764653031633033323862643732326434613564363935336166386239613932653765
|
||||
32323335306634326133613334386262316464613166373031376362653266653937303131653165
|
||||
376436643431366561323866383231343362
|
||||
# Usage notes:
|
||||
# - Treat this file as the single source of truth for IPs and VLANs.
|
||||
# - Ansible playbooks should read `networks` and `lab_hosts` to render configs,
|
||||
# update `inventory/hosts.ini`, and generate DHCP reservation templates.
|
||||
#
|
||||
# Discussion queue (2026-03-13):
|
||||
# - Decide NAS + Ansible + Watchtower reporting model (agentless scrape, exporter sidecar, or API/blackbox only).
|
||||
# - Decide Omada onboarding scope and what should be automated via Ansible versus documented/manual operations.
|
||||
25
ansible/ansible-old/group_vars/vault/.gitignore
vendored
Normal file
25
ansible/ansible-old/group_vars/vault/.gitignore
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
# Vault encrypted variables directory
|
||||
#
|
||||
# This directory contains ENCRYPTED credentials and API keys using Ansible Vault.
|
||||
#
|
||||
# SECURITY POLICY:
|
||||
# - ✅ DO commit: Encrypted .yml files (e.g., all.yml, production.yml)
|
||||
# These are safe because they are encrypted and cannot be decrypted without the vault password.
|
||||
# - ❌ DON'T commit: Plaintext passwords or unencrypted files
|
||||
# Keep these patterns blocked in .gitignore
|
||||
# - ❌ DON'T commit: .vault_pass, password files, or temporary backups
|
||||
|
||||
# Ignore plaintext password/backup files
|
||||
*.orig
|
||||
*.bak
|
||||
*.tmp
|
||||
.vault_pass
|
||||
password
|
||||
vault_password
|
||||
vault_password.txt
|
||||
|
||||
# Ignore editor temporary files
|
||||
*~
|
||||
*.swp
|
||||
*.swo
|
||||
.DS_Store
|
||||
27
ansible/ansible-old/group_vars/vault/all.yml
Normal file
27
ansible/ansible-old/group_vars/vault/all.yml
Normal file
@ -0,0 +1,27 @@
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
62376339373839396561386638616366313633303966333566386138313162616463366339323834
|
||||
3962656465346564343161643561353434613163623861350a366362363134396231616165333265
|
||||
32613166336432356165386562333764323030306266323764353833613235393766653565326564
|
||||
6235353936336131630a383637303033333161613361366230663733313031323162386431646464
|
||||
64303164376463316232386366633039316638326634376137313264326533613137306164633061
|
||||
64616164353933646166383735653464336436633364623739386438636438306434346234613331
|
||||
62396363336162316363386665643961636161623731356532393537333264323731313933613830
|
||||
35343363353231303235396438666364666134643831396139643433656436636631633061623032
|
||||
64326337336165373439666639663861393765633132663337363931306462323533646633323832
|
||||
39626331663764393032316134613033306334303862346533343230326437326638626436303438
|
||||
63646130633163616262306665313637383065633563613739373365363133623631326665316334
|
||||
31376238616630633037613939643235353031633962313666383030613833643832663763323035
|
||||
62333633393339636561313463306433303537356161303664663566383065393031663232623465
|
||||
38383737373933303161633566663832636564663838343038613333346338636666313134353334
|
||||
39333862393665333366396661643832366133313164363731656139326630633064633137343036
|
||||
32633630623532646132623230653064623432626537653261323235356238303861663330346239
|
||||
35393563656634663339653862313136366537633130636538656439323437613164313836653136
|
||||
62346136646336363333303730616130616263623765366230663661626236663766616238336336
|
||||
31656561653062666563316439393733656636303164613433373265303266303038376465646533
|
||||
65626237383432353037636535646433336163316235343130343065643837653235343333326432
|
||||
31343766626531386338643232383865656362326266343034323238376232333433386535666537
|
||||
30333435366232303132306561643665303933393430373837326134393030323163303939376661
|
||||
35316661313035393531613865383234353766626338303439613136343634356131626137663437
|
||||
62663961623232373939356636333361666232626563383361323462666639653162636166666462
|
||||
31643434633162316532326336303335633466303731313438613936323364336336356631393032
|
||||
3263323261336361623430333331663263393862666435306639
|
||||
7
ansible/ansible-old/hosts.ini
Normal file
7
ansible/ansible-old/hosts.ini
Normal file
@ -0,0 +1,7 @@
|
||||
# DEPRECATED FILE
|
||||
#
|
||||
# Canonical inventory path:
|
||||
# ansible/inventory/hosts.ini
|
||||
#
|
||||
# This file is intentionally kept as a pointer to prevent accidental use of
|
||||
# stale host definitions from older workflows.
|
||||
20
ansible/ansible-old/inventory/host_vars/heimdall.yml
Normal file
20
ansible/ansible-old/inventory/host_vars/heimdall.yml
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
# host_vars/heimdall.yml
|
||||
# Vault-encrypted host secrets for Heimdall edge role
|
||||
heimdall_cf_dns_api_token: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
39363263373530393233323165303336613536383739666137353635386163663536396539376233
|
||||
6134386639313565336434656662343361353863303863610a643837353932393836316530623338
|
||||
35656461346463386635336431383138376132666362353964363531613465383966616132366361
|
||||
6133623330653562300a326134346666393462303739646266356633383366356364613432313533
|
||||
32353462663233626664303630663139383031643034623930623630303837333933393062383031
|
||||
3339663233626535633735303535353565323132303863633932
|
||||
|
||||
heimdall_dashboard_htpasswd: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
34333333383665643861643735663664626538303836653565333430326530643434333835396630
|
||||
6563386232623937626364323937356266363565353134370a616634363463633736663261646236
|
||||
35653036666339663562653633393436366334343737666530626233323366373933636238383764
|
||||
3261386363363766650a643266363636353730373161643762666430653233633033323634626166
|
||||
66303230643836303933623564363766636531313436613232326138653764353037643965646136
|
||||
3262393863306333383632396133386139663163376335333361
|
||||
5
ansible/ansible-old/inventory/host_vars/terramaster.yml
Normal file
5
ansible/ansible-old/inventory/host_vars/terramaster.yml
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
ansible_user: chester
|
||||
ansible_ssh_private_key_file: /home/chester/.ssh/id_ed25519
|
||||
# TerraMaster key was deployed via terramaster_deploy_ssh_key.yml.
|
||||
# If key auth breaks, re-run that playbook with --ask-pass to redeploy.
|
||||
85
ansible/ansible-old/inventory/hosts.ini
Normal file
85
ansible/ansible-old/inventory/hosts.ini
Normal file
@ -0,0 +1,85 @@
|
||||
# Generated inventory from ../group_vars/all.yml
|
||||
|
||||
# --- Watchtower (local controller) ---
|
||||
[watchtower]
|
||||
localhost ansible_connection=local
|
||||
|
||||
# --- Proxmox Cluster (management) ---
|
||||
[proxmox_cluster]
|
||||
pve01 ansible_host=10.0.0.201 ansible_user=root ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519 ansible_port=22
|
||||
pve02 ansible_host=10.0.0.202 ansible_user=root ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519 ansible_port=22
|
||||
pve03 ansible_host=10.0.0.203 ansible_user=root ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519 ansible_port=22
|
||||
|
||||
[proxmox_cluster:vars]
|
||||
ansible_user=root
|
||||
ansible_become=true
|
||||
ansible_python_interpreter=/usr/bin/python3
|
||||
|
||||
# --- Swarm Managers ---
|
||||
[swarm_managers]
|
||||
swarm-manager-1 ansible_host=10.0.0.211
|
||||
swarm-manager-2 ansible_host=10.0.0.212
|
||||
swarm-manager-3 ansible_host=10.0.0.213
|
||||
|
||||
# --- Swarm Workers ---
|
||||
[swarm_workers]
|
||||
swarm-worker-1 ansible_host=10.0.0.221
|
||||
swarm-worker-2 ansible_host=10.0.0.222
|
||||
swarm-worker-3 ansible_host=10.0.0.223
|
||||
|
||||
[swarm_hosts:children]
|
||||
swarm_managers
|
||||
swarm_workers
|
||||
|
||||
[swarm_hosts:vars]
|
||||
ansible_user=chester
|
||||
ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519
|
||||
|
||||
# --- Standalone Ubuntu VMs ---
|
||||
[standalone_ubuntu]
|
||||
statler ansible_host=10.0.0.210
|
||||
|
||||
[standalone_ubuntu:vars]
|
||||
ansible_user=chester
|
||||
ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519
|
||||
|
||||
# --- Heimdall (Edge Router / Traefik host) ---
|
||||
[heimdall_hosts]
|
||||
heimdall ansible_host=10.0.0.151
|
||||
|
||||
[heimdall_hosts:vars]
|
||||
ansible_user=chester
|
||||
ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519
|
||||
|
||||
# --- AI Grid ---
|
||||
[ai_grid]
|
||||
|
||||
# --- Docker Hosts ---
|
||||
[docker_hosts]
|
||||
statler ansible_host=10.0.0.210
|
||||
|
||||
# --- Storage ---
|
||||
[storage]
|
||||
synology ansible_host=10.0.0.249 ansible_scp_if_ssh=True
|
||||
terramaster ansible_host=10.0.0.250 ansible_scp_if_ssh=True
|
||||
|
||||
# --- Lifecycle: Onboarding TBD ---
|
||||
[onboarding_tbd]
|
||||
ai-lenovo ansible_host=10.0.0.220
|
||||
|
||||
# --- Lifecycle: Retired / Shutdown ---
|
||||
[retired_hosts]
|
||||
waldorf ansible_host=10.0.0.251
|
||||
|
||||
# --- Aggregate grouping ---
|
||||
[ubuntu_lab:children]
|
||||
swarm_managers
|
||||
swarm_workers
|
||||
standalone_ubuntu
|
||||
ai_grid
|
||||
docker_hosts
|
||||
storage
|
||||
|
||||
[ubuntu_lab:vars]
|
||||
ansible_user=chester
|
||||
ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519
|
||||
340
ansible/ansible-old/outputs/SWARM_TOPOLOGY_ANALYSIS_20260312.md
Normal file
340
ansible/ansible-old/outputs/SWARM_TOPOLOGY_ANALYSIS_20260312.md
Normal file
@ -0,0 +1,340 @@
|
||||
---
|
||||
# Hardware Specifications & Docker Swarm Topology Analysis
|
||||
# Generated: 2026-03-12
|
||||
# Subject Hosts: pve03 (10.0.0.203) vs pve04 (10.0.0.204)
|
||||
# Context: Evaluating 3-node identical Proxmox cluster for Docker Swarm workloads
|
||||
|
||||
---
|
||||
|
||||
## EXECUTIVE SUMMARY
|
||||
|
||||
**Finding**: pve03 and pve04 are **NOT identical**, with meaningful differences:
|
||||
- **pve03**: 10 cores, 23.6 GB RAM, unknown storage capacity (already clustered, running 3 VMs)
|
||||
- **pve04**: 14 cores, 15 GB RAM, 238.5 GB NVMe SSD (fresh, not yet clustered)
|
||||
|
||||
**Recommendation for "3 identically-spec'd devices":**
|
||||
- **Option A (Recommended)**: Use **pve04 as the template model**. Procurement should source 3× Intel Core i5-13500T machines with 15+ GB RAM and 240+ GB NVMe storage. pve04 is the better baseline (better single-thread performance, dedicated NVMe, fresh OS).
|
||||
- **Option B**: Keep **pve03 as template**. Run a deeper audit on pve03's actual storage (it has 21 loop/dm devices—unclear if additional storage is attached). Backfill pve04 and a 3rd host to match pve03's full config.
|
||||
|
||||
**Verdict**: **pve04 > pve03 for Swarm baseline**. The i5-13500T offers superior CPU performance (4600 MHz boost vs 2885 MHz), dedicated fast storage, and is freshly provisioned. Use pve04 as the reference architecture for the 3rd node.
|
||||
|
||||
---
|
||||
|
||||
## DETAILED HARDWARE COMPARISON
|
||||
|
||||
### CPU Specifications
|
||||
|
||||
| Dimension | pve03 | pve04 | Status |
|
||||
|-----------|-------|-------|--------|
|
||||
| **Model** | Unknown / unrecognized | Intel Core i5-13500T | ✅ pve04 superior |
|
||||
| **Architecture** | x86_64 | x86_64 | ✅ Match |
|
||||
| **Socket Count** | 1 | 1 | ✅ Match |
|
||||
| **Cores per Socket** | 10 | 14 | ⚠️ **MISMATCH** |
|
||||
| **Logical CPUs (with HT)** | 10 | 20 | ⚠️ **MISMATCH** |
|
||||
| **Max Frequency** | 2,885 MHz | 4,600 MHz | ⚠️ **pve04 55% faster** |
|
||||
| **Min Frequency** | Unknown | 800 MHz | — |
|
||||
| **Microcode Level** | 0x437 | 0x3a | — |
|
||||
|
||||
**Interpretation:**
|
||||
- pve04's i5-13500T is a **13th-gen Intel desktop CPU** (2023), significantly newer and faster than pve03
|
||||
- pve03's CPU could be a degraded/limited processor or a different i5/i7 SKU—need clarification
|
||||
- **For Docker Swarm workloads**: pve04's higher clock speed (4600 MHz) means better latency-sensitive tasks; pve03's 10 cores are still adequate for the planned 2 VMs (manager + worker) per node
|
||||
|
||||
**Recommendation**: If strict "identical" is the mandate, **pve04 is the better model to replicate**. Purchasing 3× i5-13500T machines ensures:
|
||||
1. Consistent single-threaded performance
|
||||
2. Known thermal/power envelope
|
||||
3. Support (retail CPUs, widely available)
|
||||
|
||||
---
|
||||
|
||||
### Memory (RAM) Specifications
|
||||
|
||||
| Dimension | pve03 | pve04 | Status |
|
||||
|-----------|-------|-------|--------|
|
||||
| **Total RAM** | 23.6 GB | 15.0 GB | ⚠️ **MISMATCH** |
|
||||
| **Free RAM** | 12.4 GB | 13.0 GB | ⚠️ pve03 has extra, currently used |
|
||||
| **Used by OS + Proxmox** | ~11.2 GB | ~1.7 GB | ⚠️ pve03 heavier |
|
||||
|
||||
**Interpretation:**
|
||||
- pve03: 23.6 GB total (likely 2× 12 GB or 4× 8 GB SODIMM/UDIMM sticks)
|
||||
- pve04: 15 GB total (likely 1× 16 GB, with 1 GB reserved for BIOS/SMM)
|
||||
- pve03 is using ~11 GB for the OS and Proxmox daemon + 3 running VMs
|
||||
- pve04 is minimal (fresh install, no VMs)
|
||||
|
||||
**Validation Against Swarm Requirements:**
|
||||
- Each node will host 2 VMs: 1 manager (2 cores, 2 GB RAM) + 1 worker (2 cores, 2 GB RAM)
|
||||
- Proxmox overhead: ~2-4 GB per node
|
||||
- **Minimum needed: 8+ GB RAM per node** ✅ Both qualify
|
||||
- **Optimal: 16 GB** ✅ pve04 meets this; pve03 exceeds it
|
||||
|
||||
**Recommendation**: Use **16 GB as the standard** for 3-node cluster (matches pve04). This is cost-effective and provides ample headroom.
|
||||
|
||||
---
|
||||
|
||||
### Storage Specifications
|
||||
|
||||
| Dimension | pve03 | pve04 | Status |
|
||||
|-----------|-------|-------|--------|
|
||||
| **Primary Disk(s)** | Unknown (21 loop/dm devices detected) | 1× 238.5 GB NVMe SSD | ⚠️ **pve04 transparent** |
|
||||
| **Root FS Capacity** | 68 GB | 238.5 GB | ⚠️ **MISMATCH** |
|
||||
| **Root FS Available** | 59 GB free | ~230 GB available | ⚠️ pve04 has more room |
|
||||
| **Storage Type** | Unknown (likely SATA SSD or array) | Enterprise-grade NVMe | — |
|
||||
|
||||
**Interpretation:**
|
||||
- pve03's storage is **opaque**: 21 loop and device-mapper devices suggest:
|
||||
- Possible RAID configuration (dm-* = device mapper)
|
||||
- LVM (Logical Volume Manager) setup
|
||||
- Possibly shared storage mounted
|
||||
- Current state: ~68 GB LVM volume, 9 GB used
|
||||
- pve04's storage is **straightforward**: Single 238.5 GB NVMe SSD, clean LVM setup, minimal OS footprint
|
||||
|
||||
**VM Storage Requirements (per node):**
|
||||
- 1 Manager VM: 32 GB disk (from provisionspec in your playbook)
|
||||
- 1 Worker VM: 32 GB disk
|
||||
- **Total per node: 64 GB guest storage** (+ Proxmox root FS)
|
||||
- **Total available after OS: pve03 ≈ 59 GB, pve04 ≈ 230 GB**
|
||||
|
||||
**⚠️ CRITICAL FINDING**: pve03 has **insufficient disk capacity** for the planned topology (needs 64 GB for VMs + OS buffer = ~80 GB, only has ~59 GB free). **Unless pve03 has additional storage mounted (not visible in the scan), it cannot host 2 full 32 GB VMs.**
|
||||
|
||||
**Recommendation**:
|
||||
1. **Immediate**: Verify pve03's storage architecture. Why 21 dm/loop devices? Is there additional NAS/SAN attached?
|
||||
2. **For 3rd node procurement**: Use **pve04 as baseline**:
|
||||
- 240+ GB NVMe SSD (minimum)
|
||||
- Clean, single-drive configuration (KISS principle)
|
||||
- Sufficient headroom for VMs + snapshots + log growth
|
||||
|
||||
---
|
||||
|
||||
### Network Specifications
|
||||
|
||||
| Dimension | pve03 | pve04 | Status |
|
||||
|-----------|-------|-------|--------|
|
||||
| **Interface Count** | 6 interfaces | 4 interfaces | — |
|
||||
| **Bridge** | vmbr0 + tap devices | vmbr0 visible | ✅ Both standard |
|
||||
| **Primary Network** | wlp0s20f3 + nic0 | wlp0s20f3 + nic0 | ✅ Match (suggest renaming nic0) |
|
||||
|
||||
**Interpretation:**
|
||||
- Both nodes have the **same network card models** (wlp0s20f3 = wireless, nic0 = Ethernet)
|
||||
- pve03 has **2 tap devices** (tap301i0, tap302i0) = VM network interfaces from running VMs
|
||||
- pve04 has **no tap devices** = freshly imaged, no VMs yet
|
||||
- **Corosync / Proxmox Cluster**: Both will use vmbr0 for inter-node communication
|
||||
|
||||
**Recommendation**: Both nodes are network-compatible. No issues for Docker Swarm overlay networking.
|
||||
|
||||
---
|
||||
|
||||
### Proxmox & Cluster Status
|
||||
|
||||
| Dimension | pve03 | pve04 | Status |
|
||||
|-----------|-------|-------|--------|
|
||||
| **Proxmox Version** | 9.1.6 | 9.1.1 | ⚠️ Versions differ by .5 patch |
|
||||
| **Kernel** | 6.17.2-1-pve | 6.17.2-1-pve | ✅ Match |
|
||||
| **OS Distro** | Debian trixie | Debian trixie | ✅ Match |
|
||||
| **Cluster Status** | ✅ Clustered (homelab) | ❌ Not clustered | — |
|
||||
| **Cluster Members** | pve01, pve02, pve03 | None yet | — |
|
||||
| **VMs Running** | 3 VMs/containers | 0 VMs | — |
|
||||
| **Uptime** | 4 days | ~0 days (fresh) | — |
|
||||
|
||||
**Interpretation:**
|
||||
- pve03 is an **active, production node** in the homelab cluster
|
||||
- pve04 is a **fresh candidate** ready for integration
|
||||
- Minor version difference (9.1.6 vs 9.1.1) is **not a blocker**—routine updates will align them
|
||||
|
||||
**Recommendation**: Update both to the latest Proxmox 9.x patch level before final cluster formation.
|
||||
|
||||
---
|
||||
|
||||
## DOCKER SWARM TOPOLOGY ANALYSIS
|
||||
|
||||
### Target Design (from documentation/architecture/compute-plane.md)
|
||||
- 3× identically-spec'd physical Proxmox nodes
|
||||
- 3× Swarm Managers (1 per node, IPs: 10.0.0.211–213)
|
||||
- 3× Swarm Workers (1 per node, IPs: 10.0.0.221–223)
|
||||
- Each VM: 2 vCPU, 4 GB RAM, 32 GB disk
|
||||
- Proxmox cluster with Corosync for HA
|
||||
- No overcommit
|
||||
|
||||
### Capacity Analysis: pve04 as Reference Model
|
||||
|
||||
#### CPU
|
||||
- **pve04 Spec**: 14 cores, 1 socket, 4600 MHz peak
|
||||
- **Planned Usage**: 4 vCPU (2 for manager, 2 for worker) = **28.6% utilization**
|
||||
- **Proxmox/Corosync Overhead**: ~1 vCPU
|
||||
- **Available Headroom**: 14 - 4 - 1 = **9 vCPU spare**
|
||||
- **Verdict**: ✅ **EXCELLENT**. Can sustain workload + spikes + 2x VM migration
|
||||
|
||||
#### Memory (15 GB)
|
||||
- **Planned Usage**: 4 GB (manager) + 4 GB (worker) = 8 GB
|
||||
- **Proxmox OS + daemons**: ~2–3 GB
|
||||
- **Available Headroom**: 15 - 8 - 2.5 = **4.5 GB spare**
|
||||
- **Verdict**: ✅ **ADEQUATE**. No aggressive swapping. Supports scheduled workload growth.
|
||||
|
||||
#### Storage (240 GB)
|
||||
- **Planned Usage**: 32 GB (manager) + 32 GB (worker) = 64 GB
|
||||
- **Proxmox OS**: ~8 GB
|
||||
- **Snapshots/Logs Buffer**: ~20 GB
|
||||
- **Total Planned**: ~92 GB
|
||||
- **Available Headroom**: 240 - 92 = **148 GB spare**
|
||||
- **Verdict**: ✅ **EXCELLENT**. Ample room for workload scaling, backups, experiments.
|
||||
|
||||
#### Network
|
||||
- **Swarm Overlay**: vmbr0 at 1 Gbps
|
||||
- **Expected inter-node throughput**: <100 Mbps for modest swarm (10–20 containers)
|
||||
- **Verdict**: ✅ **ADEQUATE** for Docker Swarm in homelab. Upgrade to 10 Gbps if production-scale or data-intensive AI workloads planned.
|
||||
|
||||
---
|
||||
|
||||
### High-Availability & Resilience
|
||||
|
||||
#### Quorum Analysis
|
||||
- **3 Proxmox Nodes**: Corosync quorum = 2/3 nodes required
|
||||
- Can tolerate 1 node failure ✅ Good
|
||||
- If node1 fails: quorum = nodes 2+3 (still ≥2) → **cluster remains operational**
|
||||
- **3 Swarm Managers**: Raft consensus quorum = 2/3 nodes required
|
||||
- Can tolerate 1 manager failure ✅ Good
|
||||
- If manager1 fails: quorum = managers 2+3 (still ≥2) → **swarm remains operational**
|
||||
|
||||
#### Failure Scenarios
|
||||
| Scenario | Outcome | Swarm Impact |
|
||||
|----------|---------|--------------|
|
||||
| 1 node power fails | Surviving nodes take over VMs | Containers restart on node 2&3 |
|
||||
| 1 node storage corrupt | Proxmox HA can restart VMs on peer | Brief service interruption (~30s) |
|
||||
| 1 node network partition | Corosync detects; quorum = 2 survivors | Cluster continues; isolated node reboots |
|
||||
| 2 nodes fail simultaneously | Game over; cluster non-functional | **ALL workload lost** |
|
||||
|
||||
**Verdict**: Design supports N-1 failure tolerance. **Very good for homelab.**
|
||||
|
||||
---
|
||||
|
||||
## SPECIAL CONSIDERATIONS FOR pve03
|
||||
|
||||
### Storage Mystery: 21 Loop/Device-Mapper Devices
|
||||
**Questions to Investigate:**
|
||||
1. Is pve03 mounted to external NAS/SAN (e.g., Synology 10.0.0.249)?
|
||||
2. Is there a RAID or LVM snapshot setup?
|
||||
3. Were multiple physical drives present originally, now failed?
|
||||
|
||||
**Action Items:**
|
||||
```bash
|
||||
# From watchtower or pve03:
|
||||
pvesh get /storage --output-format json # List all Proxmox storage targets
|
||||
zfs list # If ZFS in use
|
||||
lvs # LVM volumes
|
||||
pvdisplay # LVM physical volumes
|
||||
df -i # Inode usage (helps diagnose loop mounts)
|
||||
```
|
||||
|
||||
**Implication**: Until pve03's storage is clarified, it **cannot be used as a template** for the 3rd identical host.
|
||||
|
||||
---
|
||||
|
||||
## FINAL RECOMMENDATIONS
|
||||
|
||||
### 1. **Short-Term (Immediate)**
|
||||
|
||||
**Action**: Clarify pve03's storage architecture.
|
||||
```bash
|
||||
# SSH into pve03 via watchtower relay or direct if SSH key added
|
||||
ssh root@10.0.0.203 "pvesh get /storage --output-format json"
|
||||
ssh root@10.0.0.203 "lvs && pvs"
|
||||
ssh root@10.0.0.203 "zfs list 2>/dev/null || echo 'ZFS not in use'"
|
||||
```
|
||||
|
||||
**If pve03 has external storage**:
|
||||
- Note the configuration (NAS IP, mount method, capacity)
|
||||
- Plan to replicate in 3rd node
|
||||
|
||||
**If pve03 is just a single drive**:
|
||||
- Proceed with pve04 as template
|
||||
|
||||
### 2. **Medium-Term (Before Final 3-Node Deployment)**
|
||||
|
||||
**Option A: Adopt pve04 as Template (RECOMMENDED)**
|
||||
- Procurement: 3× machines with **Intel i5-13500T, 16 GB RAM, 256 GB NVMe**
|
||||
- Cost: ~$200–300 per node (retail Core i5 desktop equivalent)
|
||||
- Timeline: 1–2 weeks (sourcing)
|
||||
- Next step: Install Proxmox 9.x on 3rd node; cluster join
|
||||
|
||||
**Option B: Backfill pve03 Config to pve04 & 3rd Node**
|
||||
- Upgrade pve04 RAM from 15 GB → 24 GB (add 1× 8 GB SODIMM)
|
||||
- Verify pve03's external storage is documented
|
||||
- Replicate in pve04 and 3rd node
|
||||
- Cost: ~$30–50 per node (additional RAM)
|
||||
- Timeline: 1 week
|
||||
- Risk: Depends on clarifying pve03 fully
|
||||
|
||||
**Recommendation Pick**: **Option A is cleaner**. pve04 is fresher, faster, and has clear config.
|
||||
|
||||
### 3. **Long-Term (Post-3-Node Commissioning)**
|
||||
|
||||
**Cluster Formation:**
|
||||
```bash
|
||||
# On pve04 (assuming elected as initial leader):
|
||||
pvecm create homelab
|
||||
|
||||
# On 3rd new node:
|
||||
pvecm add <pve04_ip_or_hostname>
|
||||
|
||||
# Verify:
|
||||
pvesh get /cluster/status
|
||||
```
|
||||
|
||||
**VM Provisioning:**
|
||||
```bash
|
||||
# Use your existing playbook:
|
||||
ansible-playbook -i inventory/hosts.ini \
|
||||
playbooks/proxmox/provision_swarm_vms.yml \
|
||||
-e target_host=pve04 \
|
||||
-e target_host=pve0N # For 3rd node
|
||||
```
|
||||
|
||||
**Docker Swarm Init:**
|
||||
```bash
|
||||
# On swarm-manager-1 (e.g., 10.0.0.211):
|
||||
docker swarm init --advertise-addr 10.0.0.211
|
||||
|
||||
# On manager-2 & manager-3:
|
||||
docker swarm join --token <manager-token> 10.0.0.211:2377
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## APPENDIX: Hardware Specs Collected
|
||||
|
||||
### pve03 (10.0.0.203) – Full Details
|
||||
```
|
||||
CPU: 10 cores, 1 socket, max 2885 MHz
|
||||
Memory: 23.6 GB total, 12.4 GB free
|
||||
Storage: 68 GB root LVM (59 GB free) + 21 dm/loop devices (TBD)
|
||||
OS: Debian trixie, kernel 6.17.2-1-pve
|
||||
Proxmox: 9.1.6
|
||||
Network: 6 interfaces (vmbr0, nic0, wlp0s20f3, tap301i0, tap302i0, lo)
|
||||
Cluster Status: Clustered (homelab), 3 VMs running
|
||||
Uptime: 4 days
|
||||
```
|
||||
|
||||
### pve04 (10.0.0.204) – Full Details
|
||||
```
|
||||
CPU: Intel Core i5-13500T, 14 cores, 1 socket, 20 vCPUs (HT), max 4600 MHz
|
||||
Memory: 15.0 GB total, ~13.0 GB available, 8.0 GB swap
|
||||
Storage: 238.5 GB NVMe SSD (nvme0n1), single drive
|
||||
OS: Debian trixie, kernel 6.17.2-1-pve
|
||||
Proxmox: 9.1.1
|
||||
Network: 4 interfaces (vmbr0, nic0, wlp0s20f3, lo)
|
||||
Cluster Status: Not clustered yet, 0 VMs
|
||||
Uptime: Fresh (just rebooted)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CONCLUSION
|
||||
|
||||
**pve04 is the superior choice** for replication to a 3-node cluster because of:
|
||||
1. **CPU performance**: 4600 MHz vs 2885 MHz (55% faster single-thread)
|
||||
2. **Storage clarity**: Single 240 GB NVMe (vs pve03's mysterious setup)
|
||||
3. **Ballpark specifications**: 15 GB RAM + 240 GB SSD = excellent value for Swarm workloads
|
||||
4. **Freshness**: No legacy config debt
|
||||
|
||||
**Immediate action**: Clarify pve03's storage. Then either adopt pve04 as template or provide additional pve03 context to backfill.
|
||||
|
||||
**Expected outcome**: 3-node Proxmox cluster running 6 Docker Swarm nodes (3 managers, 3 workers) with excellent resilience, performance, and headroom for future growth.
|
||||
@ -0,0 +1,52 @@
|
||||
Project: node-replacement-mar13-2026
|
||||
Mode: validate
|
||||
Join node: pve01
|
||||
Join anchor host: pve02
|
||||
Join anchor IP: 10.0.0.202
|
||||
Timestamp: 20260313T143107
|
||||
|
||||
=== pvecm nodes (anchor) ===
|
||||
|
||||
Membership information
|
||||
----------------------
|
||||
Nodeid Votes Name
|
||||
1 1 pve01
|
||||
2 1 pve02 (local)
|
||||
3 1 pve03
|
||||
|
||||
=== pvecm status (anchor) ===
|
||||
Cluster information
|
||||
-------------------
|
||||
Name: homelab
|
||||
Config Version: 4
|
||||
Transport: knet
|
||||
Secure auth: on
|
||||
|
||||
Quorum information
|
||||
------------------
|
||||
Date: Fri Mar 13 14:31:11 2026
|
||||
Quorum provider: corosync_votequorum
|
||||
Nodes: 3
|
||||
Node ID: 0x00000002
|
||||
Ring ID: 1.3c
|
||||
Quorate: Yes
|
||||
|
||||
Votequorum information
|
||||
----------------------
|
||||
Expected votes: 3
|
||||
Highest expected: 3
|
||||
Total votes: 3
|
||||
Quorum: 2
|
||||
Flags: Quorate
|
||||
|
||||
Membership information
|
||||
----------------------
|
||||
Nodeid Votes Name
|
||||
0x00000001 1 10.0.0.201
|
||||
0x00000002 1 10.0.0.202 (local)
|
||||
0x00000003 1 10.0.0.203
|
||||
|
||||
=== service state on join node ===
|
||||
active
|
||||
active
|
||||
inactive
|
||||
@ -0,0 +1,52 @@
|
||||
Project: node-replacement-mar13-2026
|
||||
Mode: join
|
||||
Join node: pve01
|
||||
Join anchor host: pve02
|
||||
Join anchor IP: 10.0.0.202
|
||||
Timestamp: 20260313T143115
|
||||
|
||||
=== pvecm nodes (anchor) ===
|
||||
|
||||
Membership information
|
||||
----------------------
|
||||
Nodeid Votes Name
|
||||
1 1 pve01
|
||||
2 1 pve02 (local)
|
||||
3 1 pve03
|
||||
|
||||
=== pvecm status (anchor) ===
|
||||
Cluster information
|
||||
-------------------
|
||||
Name: homelab
|
||||
Config Version: 5
|
||||
Transport: knet
|
||||
Secure auth: on
|
||||
|
||||
Quorum information
|
||||
------------------
|
||||
Date: Fri Mar 13 14:31:29 2026
|
||||
Quorum provider: corosync_votequorum
|
||||
Nodes: 3
|
||||
Node ID: 0x00000002
|
||||
Ring ID: 1.3c
|
||||
Quorate: Yes
|
||||
|
||||
Votequorum information
|
||||
----------------------
|
||||
Expected votes: 3
|
||||
Highest expected: 3
|
||||
Total votes: 3
|
||||
Quorum: 2
|
||||
Flags: Quorate
|
||||
|
||||
Membership information
|
||||
----------------------
|
||||
Nodeid Votes Name
|
||||
0x00000001 1 10.0.0.201
|
||||
0x00000002 1 10.0.0.202 (local)
|
||||
0x00000003 1 10.0.0.203
|
||||
|
||||
=== service state on join node ===
|
||||
active
|
||||
active
|
||||
active
|
||||
@ -0,0 +1,52 @@
|
||||
Project: node-replacement-mar13-2026
|
||||
Mode: join
|
||||
Join node: pve01
|
||||
Join anchor host: pve02
|
||||
Join anchor IP: 10.0.0.202
|
||||
Timestamp: 20260313T143430
|
||||
|
||||
=== pvecm nodes (anchor) ===
|
||||
|
||||
Membership information
|
||||
----------------------
|
||||
Nodeid Votes Name
|
||||
1 1 pve01
|
||||
2 1 pve02 (local)
|
||||
3 1 pve03
|
||||
|
||||
=== pvecm status (anchor) ===
|
||||
Cluster information
|
||||
-------------------
|
||||
Name: homelab
|
||||
Config Version: 5
|
||||
Transport: knet
|
||||
Secure auth: on
|
||||
|
||||
Quorum information
|
||||
------------------
|
||||
Date: Fri Mar 13 14:34:36 2026
|
||||
Quorum provider: corosync_votequorum
|
||||
Nodes: 3
|
||||
Node ID: 0x00000002
|
||||
Ring ID: 1.3c
|
||||
Quorate: Yes
|
||||
|
||||
Votequorum information
|
||||
----------------------
|
||||
Expected votes: 3
|
||||
Highest expected: 3
|
||||
Total votes: 3
|
||||
Quorum: 2
|
||||
Flags: Quorate
|
||||
|
||||
Membership information
|
||||
----------------------
|
||||
Nodeid Votes Name
|
||||
0x00000001 1 10.0.0.201
|
||||
0x00000002 1 10.0.0.202 (local)
|
||||
0x00000003 1 10.0.0.203
|
||||
|
||||
=== service state on join node ===
|
||||
active
|
||||
active
|
||||
active
|
||||
18
ansible/ansible-old/outputs/dhcp_reservations.csv
Normal file
18
ansible/ansible-old/outputs/dhcp_reservations.csv
Normal file
@ -0,0 +1,18 @@
|
||||
hostname,desired_ip,current_ip,mac,role,notes
|
||||
er7212pc,10.0.0.2,10.0.0.2,,gateway,"DHCP server / Omada controller — no reservation needed"
|
||||
pve01,10.0.10.11,10.0.0.201,,proxmox,"Proxmox mgmt - reserve for management interface"
|
||||
pve02,10.0.10.12,10.0.0.202,,proxmox,"Proxmox mgmt - reserve for management interface"
|
||||
pve03,10.0.10.13,10.0.0.203,,proxmox,"Proxmox mgmt - reserve for management interface"
|
||||
swarm-manager-1,10.0.200.11,10.0.0.211,,swarm_manager,"Swarm manager - static preferred"
|
||||
swarm-manager-2,10.0.200.12,10.0.0.212,,swarm_manager,"Swarm manager - static preferred"
|
||||
swarm-manager-3,10.0.200.13,10.0.0.213,,swarm_manager,"Swarm manager - static preferred"
|
||||
swarm-worker-1,10.0.200.21,10.0.0.221,,swarm_worker,"Worker - can be DHCP reservation or static"
|
||||
swarm-worker-2,10.0.200.22,10.0.0.222,,swarm_worker,"Worker - can be DHCP reservation or static"
|
||||
swarm-worker-3,10.0.200.23,10.0.0.223,,swarm_worker,"Worker - can be DHCP reservation or static"
|
||||
ai-lenovo,10.0.200.20,10.0.0.220,,ai_node,"AI node - reserve"
|
||||
synology,10.0.10.40,10.0.0.249,,nas,"NAS management IP - reserve"
|
||||
terramaster,10.0.10.41,10.0.0.250,,nas,"NAS management IP - reserve"
|
||||
waldorf,10.0.200.30,10.0.0.251,,docker_host,"Docker host - reserve"
|
||||
watchtower,10.0.10.200,10.0.0.200,,controller,"Watchtower (Pi) - reserve if controller"
|
||||
heimdall-mgmt,10.0.10.2,, ,beelink,"Heimdall (Beelink) management NIC"
|
||||
heimdall-lan,10.0.0.50,, ,beelink,"Heimdall service LAN NIC"
|
||||
|
@ -0,0 +1,88 @@
|
||||
---
|
||||
# Hardware Facts Report
|
||||
# Generated: 2026-03-12T00:49:09Z
|
||||
# Hosts Analyzed: 4
|
||||
#
|
||||
# Usage:
|
||||
# This report compares hardware specifications for Docker Swarm topology planning.
|
||||
# See README in documentation/architecture/ for capacity analysis.
|
||||
|
||||
pve03:
|
||||
cpu:
|
||||
cores_per_socket: 10
|
||||
cpu_load_percent: 0%
|
||||
current_1min_load: 0
|
||||
max_frequency_mhz: 2885
|
||||
model: '0'
|
||||
sockets: 1
|
||||
total_cores: 10
|
||||
fqdn: pve03.local
|
||||
hostname: pve03
|
||||
ip_address: 10.0.0.203
|
||||
memory:
|
||||
free_gb: 12
|
||||
free_mb: 12433
|
||||
total_gb: 23
|
||||
total_mb: 23726
|
||||
network:
|
||||
interface_list:
|
||||
- tap301i0
|
||||
- vmbr0
|
||||
- lo
|
||||
- tap302i0
|
||||
- wlp0s20f3
|
||||
- nic0
|
||||
interfaces_count: 6
|
||||
proxmox:
|
||||
cluster_members:
|
||||
- homelab
|
||||
- pve01
|
||||
- pve02
|
||||
- pve03
|
||||
cluster_name: not-clustered
|
||||
is_clustered: true
|
||||
version: ''
|
||||
version_full: 'pve-manager/9.1.6/71482d1833ded40a (running kernel: 6.17.2-1-pve)'
|
||||
vms_and_containers: 3
|
||||
storage:
|
||||
disk_list:
|
||||
- loop1
|
||||
- dm-1
|
||||
- dm-10
|
||||
- nvme0n1
|
||||
- dm-8
|
||||
- loop6
|
||||
- dm-6
|
||||
- loop4
|
||||
- dm-4
|
||||
- loop2
|
||||
- dm-2
|
||||
- dm-11
|
||||
- loop0
|
||||
- dm-0
|
||||
- dm-9
|
||||
- loop7
|
||||
- dm-7
|
||||
- loop5
|
||||
- dm-5
|
||||
- loop3
|
||||
- dm-3
|
||||
disks_detected: 21
|
||||
mounts_summary:
|
||||
- udev (12G available of 12G)
|
||||
- tmpfs (2.4G available of 2.4G)
|
||||
- /dev/mapper/pve-root (59G available of 68G)
|
||||
- tmpfs (12G available of 12G)
|
||||
- efivarfs (68K available of 438K)
|
||||
- tmpfs (5.0M available of 5.0M)
|
||||
- tmpfs (12G available of 12G)
|
||||
- /dev/nvme0n1p2 (1014M available of 1022M)
|
||||
- tmpfs (1.0M available of 1.0M)
|
||||
- tmpfs (1.0M available of 1.0M)
|
||||
- tmpfs (2.4G available of 2.4G)
|
||||
- /dev/fuse (128M available of 128M)
|
||||
system:
|
||||
kernel: 6.17.2-1-pve
|
||||
os: Debian trixie
|
||||
uptime_days: 4
|
||||
timestamp: '2026-03-12T00:49:09Z'
|
||||
@ -0,0 +1,88 @@
|
||||
---
|
||||
# Hardware Facts Report
|
||||
# Generated: 2026-03-13T01:59:28Z
|
||||
# Hosts Analyzed: 4
|
||||
#
|
||||
# Usage:
|
||||
# This report compares hardware specifications for Docker Swarm topology planning.
|
||||
# See README in documentation/architecture/ for capacity analysis.
|
||||
|
||||
pve03:
|
||||
cpu:
|
||||
cores_per_socket: '10'
|
||||
cpu_load_percent: 0%
|
||||
current_1min_load: '0'
|
||||
max_frequency_mhz: '2276'
|
||||
model: '0'
|
||||
sockets: '1'
|
||||
total_cores: '10'
|
||||
fqdn: pve03.local
|
||||
hostname: pve03
|
||||
ip_address: 10.0.0.203
|
||||
memory:
|
||||
free_gb: '11'
|
||||
free_mb: '12126'
|
||||
total_gb: '23'
|
||||
total_mb: '23726'
|
||||
network:
|
||||
interface_list:
|
||||
- vmbr0
|
||||
- wlp0s20f3
|
||||
- nic0
|
||||
- tap302i0
|
||||
- lo
|
||||
- tap301i0
|
||||
interfaces_count: '6'
|
||||
proxmox:
|
||||
cluster_members:
|
||||
- homelab
|
||||
- pve02
|
||||
- pve03
|
||||
- pve01
|
||||
cluster_name: not-clustered
|
||||
is_clustered: true
|
||||
version: ''
|
||||
version_full: 'pve-manager/9.1.6/71482d1833ded40a (running kernel: 6.17.2-1-pve)'
|
||||
vms_and_containers: '3'
|
||||
storage:
|
||||
disk_list:
|
||||
- loop1
|
||||
- dm-1
|
||||
- dm-10
|
||||
- nvme0n1
|
||||
- dm-8
|
||||
- loop6
|
||||
- dm-6
|
||||
- loop4
|
||||
- dm-4
|
||||
- loop2
|
||||
- dm-2
|
||||
- dm-11
|
||||
- loop0
|
||||
- dm-0
|
||||
- dm-9
|
||||
- loop7
|
||||
- dm-7
|
||||
- loop5
|
||||
- dm-5
|
||||
- loop3
|
||||
- dm-3
|
||||
disks_detected: '21'
|
||||
mounts_summary:
|
||||
- udev (12G available of 12G)
|
||||
- tmpfs (2.4G available of 2.4G)
|
||||
- /dev/mapper/pve-root (59G available of 68G)
|
||||
- tmpfs (12G available of 12G)
|
||||
- efivarfs (68K available of 438K)
|
||||
- tmpfs (5.0M available of 5.0M)
|
||||
- tmpfs (12G available of 12G)
|
||||
- /dev/nvme0n1p2 (1014M available of 1022M)
|
||||
- tmpfs (1.0M available of 1.0M)
|
||||
- tmpfs (1.0M available of 1.0M)
|
||||
- tmpfs (2.4G available of 2.4G)
|
||||
- /dev/fuse (128M available of 128M)
|
||||
system:
|
||||
kernel: 6.17.2-1-pve
|
||||
os: Debian trixie
|
||||
uptime_days: '5'
|
||||
timestamp: '2026-03-13T01:59:28Z'
|
||||
@ -0,0 +1,98 @@
|
||||
services:
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
networks:
|
||||
- proxy-net
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
command: redis-server --appendonly yes
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
docker-socket-proxy:
|
||||
image: tecnativa/docker-socket-proxy:latest
|
||||
container_name: docker-socket-proxy
|
||||
restart: unless-stopped
|
||||
userns_mode: "host"
|
||||
user: "0:0"
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
privileged: true
|
||||
group_add:
|
||||
- "988"
|
||||
environment:
|
||||
- CONTAINERS=1
|
||||
- SERVICES=1
|
||||
- TASKS=1
|
||||
- NETWORKS=1
|
||||
- EVENTS=1
|
||||
- VERSION=1
|
||||
- PING=1
|
||||
- AUTH=1
|
||||
- INFO=1
|
||||
- VOLUMES=1
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
networks:
|
||||
- proxy-net
|
||||
|
||||
traefik:
|
||||
image: traefik:v3.6.5
|
||||
container_name: traefik
|
||||
restart: unless-stopped
|
||||
user: "0:0"
|
||||
read_only: false
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
docker-socket-proxy:
|
||||
condition: service_started
|
||||
environment:
|
||||
- DOCKER_HOST=tcp://docker-socket-proxy:2375
|
||||
# - DOCKER_API_VERSION=1.41
|
||||
- CLOUDFLARE_DNS_API_TOKEN=${CLOUDFLARE_DNS_API_TOKEN}
|
||||
- CLOUDFLARE_ZONE_API_TOKEN=${CLOUDFLARE_DNS_API_TOKEN}
|
||||
networks:
|
||||
- proxy-net
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- ./traefik.yml:/traefik.yml:ro
|
||||
- ./traefik-data/dynamic:/dynamic:ro
|
||||
- ./traefik-data/certs:/certs
|
||||
- ./traefik-data/access-logs:/var/log/traefik
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
# Dashboard
|
||||
- "traefik.http.routers.traefik-secure.rule=Host(`proxy.castaldifamily.com`) && (PathPrefix(`/api`) || PathPrefix(`/dashboard`))"
|
||||
- "traefik.http.routers.traefik-secure.entrypoints=websecure"
|
||||
- "traefik.http.routers.traefik-secure.tls=true"
|
||||
- "traefik.http.routers.traefik-secure.tls.certresolver=cloudflare"
|
||||
- "traefik.http.routers.traefik-secure.service=api@internal"
|
||||
- "traefik.http.routers.traefik-secure.middlewares=dashboard-auth@file,security-headers@file,ratelimit-basic@file,dashboard-slash@file"
|
||||
# Root redirect
|
||||
- "traefik.http.routers.traefik-root.rule=Host(`proxy.castaldifamily.com`) && Path(`/`)"
|
||||
- "traefik.http.routers.traefik-root.entrypoints=websecure"
|
||||
- "traefik.http.routers.traefik-root.tls=true"
|
||||
- "traefik.http.routers.traefik-root.tls.certresolver=cloudflare"
|
||||
- "traefik.http.routers.traefik-root.service=api@internal"
|
||||
- "traefik.http.routers.traefik-root.middlewares=redirect-to-dashboard"
|
||||
- "traefik.http.middlewares.redirect-to-dashboard.redirectregex.regex=^/$$"
|
||||
- "traefik.http.middlewares.redirect-to-dashboard.redirectregex.replacement=/dashboard"
|
||||
- "traefik.http.middlewares.redirect-to-dashboard.redirectregex.permanent=true"
|
||||
|
||||
networks:
|
||||
proxy-net:
|
||||
driver: bridge
|
||||
name: proxy-net
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
@ -0,0 +1,975 @@
|
||||
- AppArmorProfile: docker-default
|
||||
Args:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/rootfs
|
||||
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/rootfs
|
||||
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- /bin/node_exporter
|
||||
Env:
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ExposedPorts:
|
||||
9100/tcp: {}
|
||||
Hostname: heimdall
|
||||
Image: prom/node-exporter:latest
|
||||
Labels:
|
||||
maintainer: The Prometheus Authors <prometheus-developers@googlegroups.com>
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
Tty: false
|
||||
User: nobody
|
||||
Volumes: null
|
||||
WorkingDir: ''
|
||||
Created: '2026-03-09T23:15:53.531184328Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop:
|
||||
- ALL
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: null
|
||||
DnsOptions: null
|
||||
DnsSearch: null
|
||||
ExtraHosts: null
|
||||
GroupAdd: null
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths:
|
||||
- /proc/acpi
|
||||
- /proc/asound
|
||||
- /proc/interrupts
|
||||
- /proc/kcore
|
||||
- /proc/keys
|
||||
- /proc/latency_stats
|
||||
- /proc/sched_debug
|
||||
- /proc/scsi
|
||||
- /proc/timer_list
|
||||
- /proc/timer_stats
|
||||
- /sys/devices/virtual/powercap
|
||||
- /sys/firmware
|
||||
- /sys/devices/system/cpu/cpu0/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu1/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu2/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu3/thermal_throttle
|
||||
Memory: 134217728
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 268435456
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 500000000
|
||||
NetworkMode: host
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings: {}
|
||||
Privileged: false
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths:
|
||||
- /proc/bus
|
||||
- /proc/fs
|
||||
- /proc/irq
|
||||
- /proc/sys
|
||||
- /proc/sysrq-trigger
|
||||
ReadonlyRootfs: true
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt:
|
||||
- no-new-privileges:true
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: ''
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/hostname
|
||||
HostsPath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/hosts
|
||||
Id: 3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f
|
||||
Image: sha256:3ac34ce007accad95afed72149e0d2b927b7e42fd1c866149b945b84737c62c3
|
||||
ImageManifestDescriptor:
|
||||
digest: sha256:7bcf2839f207d926b908cd3c566c9f1577efb72268062be0c96cd3b17a5cb283
|
||||
mediaType: application/vnd.docker.distribution.manifest.v2+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 949
|
||||
LogPath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /host/proc
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /proc
|
||||
Type: bind
|
||||
- Destination: /host/sys
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /sys
|
||||
Type: bind
|
||||
- Destination: /rootfs
|
||||
Mode: ro
|
||||
Propagation: rslave
|
||||
RW: false
|
||||
Source: /
|
||||
Type: bind
|
||||
Name: /node-exporter
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
host:
|
||||
Aliases: null
|
||||
DNSNames: null
|
||||
DriverOpts: null
|
||||
EndpointID: d2673440c953463f22ab1da395595e8f898bfab6baa043b2638fa2654fd04e4a
|
||||
Gateway: ''
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: ''
|
||||
IPPrefixLen: 0
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: ''
|
||||
NetworkID: b63c150f50197cfb21939a1369d37f0a309118dfb79be11d4c6082d963f8f70a
|
||||
Ports: {}
|
||||
SandboxID: 770e56f6832d109ab47e3b523e838be28d0bdf51a520cc5c9a07351bcb84f10d
|
||||
SandboxKey: /var/run/docker/netns/default
|
||||
Path: /bin/node_exporter
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '0001-01-01T00:00:00Z'
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 2616285
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-03-09T23:15:53.649932822Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
- AppArmorProfile: docker-default
|
||||
Args:
|
||||
- traefik
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- traefik
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- /entrypoint.sh
|
||||
Env:
|
||||
- CLOUDFLARE_ZONE_API_TOKEN=UQoi9oCZYU6lfIOBbTKCxTz7nvd59F9yqdOuc4Pc
|
||||
- DOCKER_HOST=tcp://docker-socket-proxy:2375
|
||||
- CLOUDFLARE_DNS_API_TOKEN=UQoi9oCZYU6lfIOBbTKCxTz7nvd59F9yqdOuc4Pc
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ExposedPorts:
|
||||
443/tcp: {}
|
||||
80/tcp: {}
|
||||
Hostname: f0c70cc4667e
|
||||
Image: traefik:v3.6.5
|
||||
Labels:
|
||||
com.docker.compose.config-hash: 42df1402e650e630bde14fa90b6287582d9b29068566faaff58ed7ca6d60fffa
|
||||
com.docker.compose.container-number: '1'
|
||||
com.docker.compose.depends_on: redis:service_healthy:false,docker-socket-proxy:service_started:false
|
||||
com.docker.compose.image: sha256:67622638cd88dbfcfba40159bc652ecf0aea0e032f8a3c7e3134ae7c037b9910
|
||||
com.docker.compose.oneoff: 'False'
|
||||
com.docker.compose.project: traefik
|
||||
com.docker.compose.project.config_files: /home/chester/traefik/docker-compose.yml
|
||||
com.docker.compose.project.working_dir: /home/chester/traefik
|
||||
com.docker.compose.replace: traefik
|
||||
com.docker.compose.service: traefik
|
||||
com.docker.compose.version: 5.0.2
|
||||
org.opencontainers.image.description: A modern reverse-proxy
|
||||
org.opencontainers.image.documentation: https://docs.traefik.io
|
||||
org.opencontainers.image.source: https://github.com/traefik/traefik
|
||||
org.opencontainers.image.title: Traefik
|
||||
org.opencontainers.image.url: https://traefik.io
|
||||
org.opencontainers.image.vendor: Traefik Labs
|
||||
org.opencontainers.image.version: v3.6.5
|
||||
traefik.enable: 'true'
|
||||
traefik.http.middlewares.redirect-to-dashboard.redirectregex.permanent: 'true'
|
||||
traefik.http.middlewares.redirect-to-dashboard.redirectregex.regex: ^/$
|
||||
traefik.http.middlewares.redirect-to-dashboard.redirectregex.replacement: /dashboard
|
||||
traefik.http.routers.traefik-root.entrypoints: websecure
|
||||
traefik.http.routers.traefik-root.middlewares: redirect-to-dashboard
|
||||
traefik.http.routers.traefik-root.rule: Host(`proxy.castaldifamily.com`)
|
||||
&& Path(`/`)
|
||||
traefik.http.routers.traefik-root.service: api@internal
|
||||
traefik.http.routers.traefik-root.tls: 'true'
|
||||
traefik.http.routers.traefik-root.tls.certresolver: cloudflare
|
||||
traefik.http.routers.traefik-secure.entrypoints: websecure
|
||||
traefik.http.routers.traefik-secure.middlewares: dashboard-auth@file,security-headers@file,ratelimit-basic@file,dashboard-slash@file
|
||||
traefik.http.routers.traefik-secure.rule: Host(`proxy.castaldifamily.com`)
|
||||
&& (PathPrefix(`/api`) || PathPrefix(`/dashboard`))
|
||||
traefik.http.routers.traefik-secure.service: api@internal
|
||||
traefik.http.routers.traefik-secure.tls: 'true'
|
||||
traefik.http.routers.traefik-secure.tls.certresolver: cloudflare
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
Tty: false
|
||||
User: 0:0
|
||||
Volumes: null
|
||||
WorkingDir: /
|
||||
Created: '2026-01-28T00:34:54.992079505Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- /home/chester/traefik/traefik-data/certs:/certs:rw
|
||||
- /home/chester/traefik/traefik-data/access-logs:/var/log/traefik:rw
|
||||
- /home/chester/traefik/traefik.yml:/traefik.yml:ro
|
||||
- /home/chester/traefik/traefik-data/dynamic:/dynamic:ro
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop: null
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: []
|
||||
DnsOptions: []
|
||||
DnsSearch: []
|
||||
ExtraHosts: []
|
||||
GroupAdd: null
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths:
|
||||
- /proc/acpi
|
||||
- /proc/asound
|
||||
- /proc/interrupts
|
||||
- /proc/kcore
|
||||
- /proc/keys
|
||||
- /proc/latency_stats
|
||||
- /proc/sched_debug
|
||||
- /proc/scsi
|
||||
- /proc/timer_list
|
||||
- /proc/timer_stats
|
||||
- /sys/devices/virtual/powercap
|
||||
- /sys/firmware
|
||||
- /sys/devices/system/cpu/cpu0/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu1/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu2/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu3/thermal_throttle
|
||||
Memory: 0
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 0
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 0
|
||||
NetworkMode: proxy-net
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings:
|
||||
443/tcp:
|
||||
- HostIp: ''
|
||||
HostPort: '443'
|
||||
80/tcp:
|
||||
- HostIp: ''
|
||||
HostPort: '80'
|
||||
Privileged: false
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths:
|
||||
- /proc/bus
|
||||
- /proc/fs
|
||||
- /proc/irq
|
||||
- /proc/sys
|
||||
- /proc/sysrq-trigger
|
||||
ReadonlyRootfs: false
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt: null
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: ''
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/hostname
|
||||
HostsPath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/hosts
|
||||
Id: f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64
|
||||
Image: sha256:67622638cd88dbfcfba40159bc652ecf0aea0e032f8a3c7e3134ae7c037b9910
|
||||
ImageManifestDescriptor:
|
||||
annotations:
|
||||
com.docker.official-images.bashbrew.arch: amd64
|
||||
org.opencontainers.image.base.digest: sha256:1882fa4569e0c591ea092d3766c4893e19b8901a8e649de7067188aba3cc0679
|
||||
org.opencontainers.image.base.name: alpine:3.23
|
||||
org.opencontainers.image.created: '2025-12-18T00:37:28Z'
|
||||
org.opencontainers.image.revision: 87ae3f90a938b0159e557ba5b6abcfd63effb714
|
||||
org.opencontainers.image.source: https://github.com/traefik/traefik-library-image.git#87ae3f90a938b0159e557ba5b6abcfd63effb714:v3.6/alpine
|
||||
org.opencontainers.image.url: https://hub.docker.com/_/traefik
|
||||
org.opencontainers.image.version: v3.6.5
|
||||
digest: sha256:d944e3693bbf5a361ddd2e411bb713049cfb4f5ff3da200b30ee7a347dbd6abd
|
||||
mediaType: application/vnd.oci.image.manifest.v1+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 1728
|
||||
LogPath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /certs
|
||||
Mode: rw
|
||||
Propagation: rprivate
|
||||
RW: true
|
||||
Source: /home/chester/traefik/traefik-data/certs
|
||||
Type: bind
|
||||
- Destination: /dynamic
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /home/chester/traefik/traefik-data/dynamic
|
||||
Type: bind
|
||||
- Destination: /traefik.yml
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /home/chester/traefik/traefik.yml
|
||||
Type: bind
|
||||
- Destination: /var/log/traefik
|
||||
Mode: rw
|
||||
Propagation: rprivate
|
||||
RW: true
|
||||
Source: /home/chester/traefik/traefik-data/access-logs
|
||||
Type: bind
|
||||
Name: /traefik
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
proxy-net:
|
||||
Aliases:
|
||||
- traefik
|
||||
- traefik
|
||||
DNSNames:
|
||||
- traefik
|
||||
- f0c70cc4667e
|
||||
DriverOpts: null
|
||||
EndpointID: 85312d375679f81387f54387dc176918f159b3c5527b527a10da91b36dc3c8f5
|
||||
Gateway: 172.18.0.1
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: 172.18.0.3
|
||||
IPPrefixLen: 16
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: c2:85:cb:12:fe:61
|
||||
NetworkID: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Ports:
|
||||
443/tcp:
|
||||
- HostIp: 0.0.0.0
|
||||
HostPort: '443'
|
||||
- HostIp: '::'
|
||||
HostPort: '443'
|
||||
80/tcp:
|
||||
- HostIp: 0.0.0.0
|
||||
HostPort: '80'
|
||||
- HostIp: '::'
|
||||
HostPort: '80'
|
||||
SandboxID: 39e089426b97fd8075a6b4fad29d0cdc3fa77b73e28f8ef96bef68e3418b7fb1
|
||||
SandboxKey: /var/run/docker/netns/39e089426b97
|
||||
Path: /entrypoint.sh
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '2026-02-21T18:15:51.551714695Z'
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 1213
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-02-21T18:30:42.488013871Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
- AppArmorProfile: unconfined
|
||||
Args:
|
||||
- haproxy
|
||||
- -f
|
||||
- /tmp/haproxy.cfg
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- haproxy
|
||||
- -f
|
||||
- /tmp/haproxy.cfg
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- docker-entrypoint.sh
|
||||
Env:
|
||||
- INFO=1
|
||||
- SERVICES=1
|
||||
- TASKS=1
|
||||
- PING=1
|
||||
- AUTH=1
|
||||
- VERSION=1
|
||||
- EVENTS=1
|
||||
- NETWORKS=1
|
||||
- CONTAINERS=1
|
||||
- VOLUMES=1
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
- HAPROXY_VERSION=3.2.4
|
||||
- HAPROXY_URL=https://www.haproxy.org/download/3.2/src/haproxy-3.2.4.tar.gz
|
||||
- HAPROXY_SHA256=5d4b2ee6fe56b8098ebb9c91a899d728f87d64cd7be8804d2ddcc5f937498c1d
|
||||
- ALLOW_RESTARTS=0
|
||||
- ALLOW_STOP=0
|
||||
- ALLOW_START=0
|
||||
- BUILD=0
|
||||
- COMMIT=0
|
||||
- CONFIGS=0
|
||||
- DISABLE_IPV6=0
|
||||
- DISTRIBUTION=0
|
||||
- EXEC=0
|
||||
- GRPC=0
|
||||
- IMAGES=0
|
||||
- LOG_LEVEL=info
|
||||
- NODES=0
|
||||
- PLUGINS=0
|
||||
- POST=0
|
||||
- SECRETS=0
|
||||
- SESSION=0
|
||||
- SOCKET_PATH=/var/run/docker.sock
|
||||
- SWARM=0
|
||||
- SYSTEM=0
|
||||
ExposedPorts:
|
||||
2375/tcp: {}
|
||||
Hostname: f59c3a7d4c30
|
||||
Image: tecnativa/docker-socket-proxy:latest
|
||||
Labels:
|
||||
com.docker.compose.config-hash: 711c15ad420cb4274f3a65832d36be4bc31327a53f09b84b803d0e1ab18a0917
|
||||
com.docker.compose.container-number: '1'
|
||||
com.docker.compose.depends_on: ''
|
||||
com.docker.compose.image: sha256:1f3a6f303320723d199d2316a3e82b2e2685d86c275d5e3deeaf182573b47476
|
||||
com.docker.compose.oneoff: 'False'
|
||||
com.docker.compose.project: traefik
|
||||
com.docker.compose.project.config_files: /home/chester/traefik/docker-compose.yml
|
||||
com.docker.compose.project.working_dir: /home/chester/traefik
|
||||
com.docker.compose.replace: docker-socket-proxy
|
||||
com.docker.compose.service: docker-socket-proxy
|
||||
com.docker.compose.version: 5.0.2
|
||||
org.opencontainers.image.created: '2025-12-16T07:26:21.623Z'
|
||||
org.opencontainers.image.description: Proxy over your Docker socket to
|
||||
restrict which requests it accepts
|
||||
org.opencontainers.image.licenses: Apache-2.0
|
||||
org.opencontainers.image.revision: 2f04313b042c1bf4dfbd039475dfc42db79bde7a
|
||||
org.opencontainers.image.source: https://github.com/Tecnativa/docker-socket-proxy
|
||||
org.opencontainers.image.title: docker-socket-proxy
|
||||
org.opencontainers.image.url: https://github.com/Tecnativa/docker-socket-proxy
|
||||
org.opencontainers.image.version: v0.4.2
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
StopSignal: SIGUSR1
|
||||
Tty: false
|
||||
User: 0:0
|
||||
Volumes: null
|
||||
WorkingDir: /var/lib/haproxy
|
||||
Created: '2026-01-28T00:34:44.663698444Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:rw
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop: null
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: []
|
||||
DnsOptions: []
|
||||
DnsSearch: []
|
||||
ExtraHosts: []
|
||||
GroupAdd:
|
||||
- '988'
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths: null
|
||||
Memory: 0
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 0
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 0
|
||||
NetworkMode: proxy-net
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings: {}
|
||||
Privileged: true
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths: null
|
||||
ReadonlyRootfs: false
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt:
|
||||
- apparmor=unconfined
|
||||
- label=disable
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: host
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/hostname
|
||||
HostsPath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/hosts
|
||||
Id: f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56
|
||||
Image: sha256:1f3a6f303320723d199d2316a3e82b2e2685d86c275d5e3deeaf182573b47476
|
||||
ImageManifestDescriptor:
|
||||
digest: sha256:bd2241b3bec83abcff25927a0a7ae518e0c5bef624b3cc247dcb31e68b53f417
|
||||
mediaType: application/vnd.oci.image.manifest.v1+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 1993
|
||||
LogPath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /var/run/docker.sock
|
||||
Mode: rw
|
||||
Propagation: rprivate
|
||||
RW: true
|
||||
Source: /var/run/docker.sock
|
||||
Type: bind
|
||||
Name: /docker-socket-proxy
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
proxy-net:
|
||||
Aliases:
|
||||
- docker-socket-proxy
|
||||
- docker-socket-proxy
|
||||
DNSNames:
|
||||
- docker-socket-proxy
|
||||
- f59c3a7d4c30
|
||||
DriverOpts: null
|
||||
EndpointID: cb18a5396cca6ed0b3c3502b8e8e2d46eb39a5afaa7350e2dd2ea9ee5448d7d3
|
||||
Gateway: 172.18.0.1
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: 172.18.0.2
|
||||
IPPrefixLen: 16
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: 42:a5:f6:d2:52:08
|
||||
NetworkID: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Ports:
|
||||
2375/tcp: null
|
||||
SandboxID: e0902b280ba958f8f4ee51c20eb33a563b8bfc1717f3fbf4dd012a05672f3e74
|
||||
SandboxKey: /var/run/docker/netns/e0902b280ba9
|
||||
Path: docker-entrypoint.sh
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '2026-02-21T18:16:00.055009796Z'
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 1225
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-02-21T18:30:42.49130796Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
- AppArmorProfile: docker-default
|
||||
Args:
|
||||
- redis-server
|
||||
- --appendonly
|
||||
- 'yes'
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- redis-server
|
||||
- --appendonly
|
||||
- 'yes'
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- docker-entrypoint.sh
|
||||
Env:
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
- GOSU_VERSION=1.17
|
||||
- REDIS_VERSION=7.4.7
|
||||
- REDIS_DOWNLOAD_URL=http://download.redis.io/releases/redis-7.4.7.tar.gz
|
||||
- REDIS_DOWNLOAD_SHA=c97e57b0df330a9e091cacff012bebe763c275398cf36ff44cdba876814b595b
|
||||
ExposedPorts:
|
||||
6379/tcp: {}
|
||||
Healthcheck:
|
||||
Interval: 10000000000
|
||||
Retries: 5
|
||||
Test:
|
||||
- CMD
|
||||
- redis-cli
|
||||
- ping
|
||||
Timeout: 5000000000
|
||||
Hostname: 57439684f5ef
|
||||
Image: redis:7-alpine
|
||||
Labels:
|
||||
com.docker.compose.config-hash: eb5826610c0f348a70810f75902caa3d6b889a5e442c0d9ddc539355c0113f49
|
||||
com.docker.compose.container-number: '1'
|
||||
com.docker.compose.depends_on: ''
|
||||
com.docker.compose.image: sha256:ee64a64eaab618d88051c3ade8f6352d11531fcf79d9a4818b9b183d8c1d18ba
|
||||
com.docker.compose.oneoff: 'False'
|
||||
com.docker.compose.project: traefik
|
||||
com.docker.compose.project.config_files: /home/chester/traefik/docker-compose.yml
|
||||
com.docker.compose.project.working_dir: /home/chester/traefik
|
||||
com.docker.compose.replace: redis
|
||||
com.docker.compose.service: redis
|
||||
com.docker.compose.version: 5.0.2
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
Tty: false
|
||||
User: ''
|
||||
Volumes:
|
||||
/data: {}
|
||||
WorkingDir: /data
|
||||
Created: '2026-01-28T00:34:44.662867915Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- traefik_redis-data:/data:rw
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop: null
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: []
|
||||
DnsOptions: []
|
||||
DnsSearch: []
|
||||
ExtraHosts: []
|
||||
GroupAdd: null
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths:
|
||||
- /proc/acpi
|
||||
- /proc/asound
|
||||
- /proc/interrupts
|
||||
- /proc/kcore
|
||||
- /proc/keys
|
||||
- /proc/latency_stats
|
||||
- /proc/sched_debug
|
||||
- /proc/scsi
|
||||
- /proc/timer_list
|
||||
- /proc/timer_stats
|
||||
- /sys/devices/virtual/powercap
|
||||
- /sys/firmware
|
||||
- /sys/devices/system/cpu/cpu0/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu1/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu2/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu3/thermal_throttle
|
||||
Memory: 0
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 0
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 0
|
||||
NetworkMode: proxy-net
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings:
|
||||
6379/tcp:
|
||||
- HostIp: ''
|
||||
HostPort: '6379'
|
||||
Privileged: false
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths:
|
||||
- /proc/bus
|
||||
- /proc/fs
|
||||
- /proc/irq
|
||||
- /proc/sys
|
||||
- /proc/sysrq-trigger
|
||||
ReadonlyRootfs: false
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt: null
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: ''
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/hostname
|
||||
HostsPath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/hosts
|
||||
Id: 57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2
|
||||
Image: sha256:ee64a64eaab618d88051c3ade8f6352d11531fcf79d9a4818b9b183d8c1d18ba
|
||||
ImageManifestDescriptor:
|
||||
annotations:
|
||||
com.docker.official-images.bashbrew.arch: amd64
|
||||
org.opencontainers.image.base.digest: sha256:41c81533144786e0beb2b148667355a6c7659aa99a14ed837ff15a98ca9d71f3
|
||||
org.opencontainers.image.base.name: alpine:3.21
|
||||
org.opencontainers.image.created: '2025-11-03T17:38:49Z'
|
||||
org.opencontainers.image.revision: d42d7aec93b1c54dd46f37a66a92f62478456039
|
||||
org.opencontainers.image.source: https://github.com/redis/docker-library-redis.git#d42d7aec93b1c54dd46f37a66a92f62478456039:7.4/alpine
|
||||
org.opencontainers.image.url: https://hub.docker.com/_/redis
|
||||
org.opencontainers.image.version: 7.4.7-alpine
|
||||
digest: sha256:4706ecab5371690fecfdd782268929c94ad5b5ce9ce0b35bfdfe191c4ad17851
|
||||
mediaType: application/vnd.oci.image.manifest.v1+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 2483
|
||||
LogPath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /data
|
||||
Driver: local
|
||||
Mode: rw
|
||||
Name: traefik_redis-data
|
||||
Propagation: ''
|
||||
RW: true
|
||||
Source: /var/lib/docker/volumes/traefik_redis-data/_data
|
||||
Type: volume
|
||||
Name: /redis
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
proxy-net:
|
||||
Aliases:
|
||||
- redis
|
||||
- redis
|
||||
DNSNames:
|
||||
- redis
|
||||
- 57439684f5ef
|
||||
DriverOpts: null
|
||||
EndpointID: 7f950d9aab3bf29937a2c66723f8fd483984fa9ccd74a859166e810c77a9ca0b
|
||||
Gateway: 172.18.0.1
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: 172.18.0.4
|
||||
IPPrefixLen: 16
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: e2:9b:a3:07:2f:81
|
||||
NetworkID: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Ports:
|
||||
6379/tcp:
|
||||
- HostIp: 0.0.0.0
|
||||
HostPort: '6379'
|
||||
- HostIp: '::'
|
||||
HostPort: '6379'
|
||||
SandboxID: dfafbd7bf0a46788747bcf7e8cbe9dcfc05886cdbb73add6cde8d3f50eeed30d
|
||||
SandboxKey: /var/run/docker/netns/dfafbd7bf0a4
|
||||
Path: docker-entrypoint.sh
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '2026-02-21T18:15:50.121096266Z'
|
||||
Health:
|
||||
FailingStreak: 0
|
||||
Log:
|
||||
- End: '2026-03-12T21:18:28.607327472Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:18:28.555451253Z'
|
||||
- End: '2026-03-12T21:18:38.654395517Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:18:38.60798899Z'
|
||||
- End: '2026-03-12T21:18:48.712837864Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:18:48.655551711Z'
|
||||
- End: '2026-03-12T21:18:58.75775082Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:18:58.713415195Z'
|
||||
- End: '2026-03-12T21:19:08.803904596Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:19:08.758205815Z'
|
||||
Status: healthy
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 1220
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-02-21T18:30:42.486966925Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
@ -0,0 +1,8 @@
|
||||
cgroup_driver: systemd
|
||||
containers_running: 4
|
||||
containers_total: 4
|
||||
daemon_config: {}
|
||||
logging_driver: json-file
|
||||
server_version: 29.2.0
|
||||
storage_driver: overlayfs
|
||||
swarm_state: inactive
|
||||
@ -0,0 +1,7 @@
|
||||
# Env key inventory — values REDACTED for security
|
||||
# Source: /home/chester/traefik/.env
|
||||
# Host: heimdall | Captured: 2026-03-12T21:19:10Z
|
||||
#
|
||||
# To restore secrets: ansible-vault encrypt_string '<value>' --name '<KEY>'
|
||||
CLOUDFLARE_DNS_API_TOKEN=<REDACTED>
|
||||
CLOUDFLARE_ZONE_API_TOKEN=<REDACTED>
|
||||
@ -0,0 +1,49 @@
|
||||
# Firewall state on heimdall
|
||||
# Captured: 2026-03-12T21:19:10Z
|
||||
|
||||
## UFW STATUS
|
||||
Status: inactive
|
||||
|
||||
## IPTABLES (reference)
|
||||
Chain INPUT (policy ACCEPT)
|
||||
num target prot opt source destination
|
||||
|
||||
Chain FORWARD (policy DROP)
|
||||
num target prot opt source destination
|
||||
1 DOCKER-USER 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
2 DOCKER-FORWARD 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain OUTPUT (policy ACCEPT)
|
||||
num target prot opt source destination
|
||||
|
||||
Chain DOCKER (2 references)
|
||||
num target prot opt source destination
|
||||
1 ACCEPT 6 -- 0.0.0.0/0 172.18.0.4 tcp dpt:6379
|
||||
2 ACCEPT 6 -- 0.0.0.0/0 172.18.0.3 tcp dpt:443
|
||||
3 ACCEPT 6 -- 0.0.0.0/0 172.18.0.3 tcp dpt:80
|
||||
4 DROP 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
5 DROP 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-BRIDGE (1 references)
|
||||
num target prot opt source destination
|
||||
1 DOCKER 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
2 DOCKER 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-CT (1 references)
|
||||
num target prot opt source destination
|
||||
1 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
|
||||
2 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
|
||||
|
||||
Chain DOCKER-FORWARD (1 references)
|
||||
num target prot opt source destination
|
||||
1 DOCKER-CT 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
2 DOCKER-INTERNAL 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
3 DOCKER-BRIDGE 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
4 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
5 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-INTERNAL (1 references)
|
||||
num target prot opt source destination
|
||||
|
||||
Chain DOCKER-USER (1 references)
|
||||
num target prot opt source destination
|
||||
@ -0,0 +1,36 @@
|
||||
ansible_user: root
|
||||
architecture: x86_64
|
||||
cpu_vcpus: 4
|
||||
default_ipv4:
|
||||
address: 10.0.0.151
|
||||
alias: enp1s0
|
||||
broadcast: 10.0.0.255
|
||||
gateway: 10.0.0.2
|
||||
interface: enp1s0
|
||||
macaddress: 7c:83:34:bf:79:a5
|
||||
mtu: 1500
|
||||
netmask: 255.255.255.0
|
||||
network: 10.0.0.0
|
||||
prefix: '24'
|
||||
type: ether
|
||||
distribution: Ubuntu
|
||||
distribution_release: noble
|
||||
distribution_version: '24.04'
|
||||
fqdn: heimdall
|
||||
hostname: heimdall
|
||||
interfaces:
|
||||
- veth57f15b2
|
||||
- wlo1
|
||||
- veth2088d3d
|
||||
- enp1s0
|
||||
- lo
|
||||
- vethe43b71e
|
||||
- br-c451239da54e
|
||||
- enp2s0
|
||||
- docker0
|
||||
kernel: 6.8.0-100-generic
|
||||
memory_free_mb: 377
|
||||
memory_total_mb: 15767
|
||||
os_family: Debian
|
||||
python_version: 3.12.3
|
||||
uptime_seconds: 1651833
|
||||
@ -0,0 +1,61 @@
|
||||
---
|
||||
---
|
||||
# Heimdall baseline capture manifest
|
||||
# Generated: 2026-03-12T21:19:10Z
|
||||
# Host: heimdall (10.0.0.151)
|
||||
# Review this file before proceeding to heimdall_edge role refactor.
|
||||
|
||||
capture_timestamp: "2026-03-12T21:19:10Z"
|
||||
capture_dir: "/home/chester/homelab/ansible/playbooks/preflight/../../outputs/heimdall-baseline-20260312T211908"
|
||||
|
||||
host:
|
||||
hostname: "heimdall"
|
||||
ip: "10.0.0.151"
|
||||
os: "Ubuntu 24.04"
|
||||
kernel: "6.8.0-100-generic"
|
||||
|
||||
docker:
|
||||
version: "29.2.0"
|
||||
storage_driver: "overlayfs"
|
||||
swarm_state: "inactive"
|
||||
containers_running: 4
|
||||
containers_total: 4
|
||||
|
||||
inventory:
|
||||
containers_found: 4
|
||||
compose_files_found: 2
|
||||
env_files_found: 2
|
||||
|
||||
critical_paths:
|
||||
/etc/docker/daemon.json: false
|
||||
/home/chester/traefik: true
|
||||
/home/chester/traefik/.env: true
|
||||
/home/chester/traefik/docker-compose.yml: true
|
||||
/opt/stacks/heimdall: false
|
||||
/opt/stacks/heimdall/.env: false
|
||||
/opt/stacks/heimdall/docker-compose.yml: false
|
||||
/opt/stacks/heimdall/redis-data: false
|
||||
/opt/stacks/heimdall/runner-data: false
|
||||
/opt/stacks/heimdall/traefik-certs: false
|
||||
/opt/stacks/heimdall/traefik-certs/acme.json: false
|
||||
|
||||
compose_file_paths:
|
||||
- /home/chester/traefik/docker-compose.yml
|
||||
- /home/chester/traefik/docker-compose.yml
|
||||
|
||||
env_file_paths:
|
||||
- /home/chester/traefik/.env
|
||||
- /home/chester/traefik/.env
|
||||
|
||||
containers_running:
|
||||
- node-exporter
|
||||
- traefik
|
||||
- docker-socket-proxy
|
||||
- redis
|
||||
|
||||
validation:
|
||||
compose_files_present: True
|
||||
containers_present: True
|
||||
stack_dir_present: False
|
||||
compose_present: False
|
||||
env_present: False
|
||||
@ -0,0 +1,25 @@
|
||||
---
|
||||
# Docker network and volume inventory
|
||||
# Host: heimdall | Captured: 2026-03-12T21:19:10Z
|
||||
|
||||
networks:
|
||||
- Driver: bridge
|
||||
Id: 4f3815cff81bd0c59f62e0151bc58bc0289eca4634f77bf544e1fc3e34c0bab7
|
||||
Name: bridge
|
||||
Scope: local
|
||||
- Driver: 'null'
|
||||
Id: a55e7a3ec6e204eae20086edec67507e3c7ef59f5e383d4b8631d614c657e0d0
|
||||
Name: none
|
||||
Scope: local
|
||||
- Driver: host
|
||||
Id: b63c150f50197cfb21939a1369d37f0a309118dfb79be11d4c6082d963f8f70a
|
||||
Name: host
|
||||
Scope: local
|
||||
- Driver: bridge
|
||||
Id: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Name: proxy-net
|
||||
Scope: local
|
||||
|
||||
volumes:
|
||||
- Driver: local
|
||||
Name: traefik_redis-data
|
||||
@ -0,0 +1,153 @@
|
||||
UNIT LOAD ACTIVE SUB DESCRIPTION
|
||||
apparmor.service loaded active exited Load AppArmor profiles
|
||||
apport-autoreport.service loaded inactive dead Process error reports when automatic reporting is enabled
|
||||
apport.service loaded active exited automatic crash report generation
|
||||
apt-daily-upgrade.service loaded inactive dead Daily apt upgrade and clean activities
|
||||
apt-daily.service loaded inactive dead Daily apt download activities
|
||||
blk-availability.service loaded active exited Availability of block devices
|
||||
cloud-init-local.service loaded inactive dead Cloud-init: Local Stage (pre-network)
|
||||
console-setup.service loaded active exited Set console font and keymap
|
||||
containerd.service loaded active running containerd container runtime
|
||||
cron.service loaded active running Regular background program processing daemon
|
||||
dbus.service loaded active running D-Bus System Message Bus
|
||||
dm-event.service loaded inactive dead Device-mapper event daemon
|
||||
dmesg.service loaded inactive dead Save initial kernel messages after boot
|
||||
docker.service loaded active running Docker Application Container Engine
|
||||
dpkg-db-backup.service loaded inactive dead Daily dpkg database backup service
|
||||
e2scrub_all.service loaded inactive dead Online ext4 Metadata Check for All Filesystems
|
||||
e2scrub_reap.service loaded inactive dead Remove Stale Online ext4 Metadata Check Snapshots
|
||||
emergency.service loaded inactive dead Emergency Shell
|
||||
finalrd.service loaded active exited Create final runtime dir for shutdown pivot root
|
||||
fstrim.service loaded inactive dead Discard unused blocks on filesystems from /etc/fstab
|
||||
fwupd-refresh.service loaded inactive dead Refresh fwupd metadata and update motd
|
||||
getty-static.service loaded inactive dead getty on tty2-tty6 if dbus and logind are not available
|
||||
getty@tty1.service loaded active running Getty on tty1
|
||||
grub-common.service loaded inactive dead Record successful boot for GRUB
|
||||
grub-initrd-fallback.service loaded inactive dead GRUB failed boot detection
|
||||
initrd-cleanup.service loaded inactive dead Cleaning Up and Shutting Down Daemons
|
||||
initrd-parse-etc.service loaded inactive dead Mountpoints Configured in the Real Root
|
||||
initrd-switch-root.service loaded inactive dead Switch Root
|
||||
initrd-udevadm-cleanup-db.service loaded inactive dead Cleanup udev Database
|
||||
iscsid.service loaded inactive dead iSCSI initiator daemon (iscsid)
|
||||
keyboard-setup.service loaded active exited Set the console keyboard layout
|
||||
kmod-static-nodes.service loaded active exited Create List of Static Device Nodes
|
||||
ldconfig.service loaded inactive dead Rebuild Dynamic Linker Cache
|
||||
logrotate.service loaded inactive dead Rotate log files
|
||||
lvm2-lvmpolld.service loaded inactive dead LVM2 poll daemon
|
||||
lvm2-monitor.service loaded active exited Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling
|
||||
man-db.service loaded inactive dead Daily man-db regeneration
|
||||
ModemManager.service loaded active running Modem Manager
|
||||
modprobe@configfs.service loaded inactive dead Load Kernel Module configfs
|
||||
modprobe@dm_mod.service loaded inactive dead Load Kernel Module dm_mod
|
||||
modprobe@drm.service loaded inactive dead Load Kernel Module drm
|
||||
modprobe@efi_pstore.service loaded inactive dead Load Kernel Module efi_pstore
|
||||
modprobe@fuse.service loaded inactive dead Load Kernel Module fuse
|
||||
modprobe@loop.service loaded inactive dead Load Kernel Module loop
|
||||
motd-news.service loaded inactive dead Message of the Day
|
||||
multipathd.service loaded active running Device-Mapper Multipath Device Controller
|
||||
netplan-ovs-cleanup.service loaded inactive dead OpenVSwitch configuration for cleanup
|
||||
networkd-dispatcher.service loaded inactive dead Dispatcher daemon for systemd-networkd
|
||||
open-iscsi.service loaded inactive dead Login to default iSCSI targets
|
||||
open-vm-tools.service loaded inactive dead Service for virtual machines hosted on VMware
|
||||
plymouth-quit-wait.service loaded active exited Hold until boot process finishes up
|
||||
plymouth-quit.service loaded active exited Terminate Plymouth Boot Screen
|
||||
plymouth-read-write.service loaded active exited Tell Plymouth To Write Out Runtime Data
|
||||
plymouth-start.service loaded inactive dead Show Plymouth Boot Screen
|
||||
plymouth-switch-root.service loaded inactive dead Plymouth switch root service
|
||||
polkit.service loaded active running Authorization Manager
|
||||
pollinate.service loaded inactive dead Pollinate to seed the pseudo random number generator
|
||||
rc-local.service loaded inactive dead /etc/rc.local Compatibility
|
||||
rescue.service loaded inactive dead Rescue Shell
|
||||
rsyslog.service loaded active running System Logging Service
|
||||
secureboot-db.service loaded inactive dead Secure Boot updates for DB and DBX
|
||||
setvtrgb.service loaded active exited Set console scheme
|
||||
snapd.apparmor.service loaded active exited Load AppArmor profiles managed internally by snapd
|
||||
snapd.autoimport.service loaded inactive dead Auto import assertions from block devices
|
||||
snapd.core-fixup.service loaded inactive dead Automatically repair incorrect owner/permissions on core devices
|
||||
snapd.failure.service loaded inactive dead Failure handling of the snapd snap
|
||||
snapd.recovery-chooser-trigger.service loaded inactive dead Wait for the Ubuntu Core chooser trigger
|
||||
snapd.seeded.service loaded active exited Wait until snapd is fully seeded
|
||||
snapd.service loaded inactive dead Snap Daemon
|
||||
snapd.snap-repair.service loaded inactive dead Automatically fetch and run repair assertions
|
||||
snapd.system-shutdown.service loaded inactive dead Ubuntu core (all-snaps) system shutdown helper setup service
|
||||
ssh.service loaded active running OpenBSD Secure Shell server
|
||||
sysstat-collect.service loaded inactive dead system activity accounting tool
|
||||
sysstat-summary.service loaded inactive dead Generate a daily summary of process accounting
|
||||
sysstat.service loaded active exited Resets System Activity Logs
|
||||
systemd-ask-password-console.service loaded inactive dead Dispatch Password Requests to Console
|
||||
systemd-ask-password-plymouth.service loaded inactive dead Forward Password Requests to Plymouth
|
||||
systemd-ask-password-wall.service loaded inactive dead Forward Password Requests to Wall
|
||||
systemd-battery-check.service loaded inactive dead Check battery level during early boot
|
||||
systemd-binfmt.service loaded active exited Set Up Additional Binary Formats
|
||||
systemd-bsod.service loaded inactive dead Displays emergency message in full screen.
|
||||
systemd-firstboot.service loaded inactive dead First Boot Wizard
|
||||
systemd-fsck-root.service loaded inactive dead File System Check on Root Device
|
||||
systemd-fsck@dev-disk-by\x2duuid-36D5\x2d0248.service loaded active exited File System Check on /dev/disk/by-uuid/36D5-0248
|
||||
systemd-fsck@dev-disk-by\x2duuid-da3c4a6e\x2df851\x2d471f\x2d81e4\x2dcd9b3b26acf1.service loaded active exited File System Check on /dev/disk/by-uuid/da3c4a6e-f851-471f-81e4-cd9b3b26acf1
|
||||
systemd-fsckd.service loaded inactive dead File System Check Daemon to report status
|
||||
systemd-hibernate-resume.service loaded inactive dead Resume from hibernation
|
||||
systemd-hibernate.service loaded inactive dead System Hibernate
|
||||
systemd-hwdb-update.service loaded inactive dead Rebuild Hardware Database
|
||||
systemd-hybrid-sleep.service loaded inactive dead System Hybrid Suspend+Hibernate
|
||||
systemd-initctl.service loaded inactive dead initctl Compatibility Daemon
|
||||
systemd-journal-catalog-update.service loaded inactive dead Rebuild Journal Catalog
|
||||
systemd-journal-flush.service loaded active exited Flush Journal to Persistent Storage
|
||||
systemd-journald.service loaded active running Journal Service
|
||||
systemd-logind.service loaded active running User Login Management
|
||||
systemd-machine-id-commit.service loaded inactive dead Commit a transient machine-id on disk
|
||||
systemd-modules-load.service loaded active exited Load Kernel Modules
|
||||
● systemd-networkd-wait-online.service loaded failed failed Wait for Network to be Configured
|
||||
systemd-networkd.service loaded active running Network Configuration
|
||||
systemd-pcrmachine.service loaded inactive dead TPM2 PCR Machine ID Measurement
|
||||
systemd-pcrphase-initrd.service loaded inactive dead TPM2 PCR Barrier (initrd)
|
||||
systemd-pcrphase-sysinit.service loaded inactive dead TPM2 PCR Barrier (Initialization)
|
||||
systemd-pcrphase.service loaded inactive dead TPM2 PCR Barrier (User)
|
||||
systemd-pstore.service loaded inactive dead Platform Persistent Storage Archival
|
||||
systemd-quotacheck.service loaded inactive dead File System Quota Check
|
||||
systemd-random-seed.service loaded active exited Load/Save OS Random Seed
|
||||
systemd-remount-fs.service loaded active exited Remount Root and Kernel File Systems
|
||||
systemd-repart.service loaded inactive dead Repartition Root Disk
|
||||
systemd-resolved.service loaded active running Network Name Resolution
|
||||
systemd-rfkill.service loaded inactive dead Load/Save RF Kill Switch Status
|
||||
systemd-soft-reboot.service loaded inactive dead Reboot System Userspace
|
||||
systemd-suspend-then-hibernate.service loaded inactive dead System Suspend then Hibernate
|
||||
systemd-suspend.service loaded inactive dead System Suspend
|
||||
systemd-sysctl.service loaded active exited Apply Kernel Variables
|
||||
systemd-sysext.service loaded inactive dead Merge System Extension Images into /usr/ and /opt/
|
||||
systemd-sysusers.service loaded inactive dead Create System Users
|
||||
systemd-timesyncd.service loaded active running Network Time Synchronization
|
||||
systemd-tmpfiles-clean.service loaded inactive dead Cleanup of Temporary Directories
|
||||
systemd-tmpfiles-setup-dev-early.service loaded active exited Create Static Device Nodes in /dev gracefully
|
||||
systemd-tmpfiles-setup-dev.service loaded active exited Create Static Device Nodes in /dev
|
||||
systemd-tmpfiles-setup.service loaded active exited Create Volatile Files and Directories
|
||||
systemd-tpm2-setup-early.service loaded inactive dead TPM2 SRK Setup (Early)
|
||||
systemd-tpm2-setup.service loaded inactive dead TPM2 SRK Setup
|
||||
systemd-udev-settle.service loaded inactive dead Wait for udev To Complete Device Initialization
|
||||
systemd-udev-trigger.service loaded active exited Coldplug All udev Devices
|
||||
systemd-udevd.service loaded active running Rule-based Manager for Device Events and Files
|
||||
systemd-update-done.service loaded inactive dead Update is Completed
|
||||
systemd-update-utmp-runlevel.service loaded inactive dead Record Runlevel Change in UTMP
|
||||
systemd-update-utmp.service loaded active exited Record System Boot/Shutdown in UTMP
|
||||
systemd-user-sessions.service loaded active exited Permit User Sessions
|
||||
thermald.service loaded active running Thermal Daemon Service
|
||||
tpm-udev.service loaded inactive dead Handle dynamically added tpm devices
|
||||
ua-reboot-cmds.service loaded inactive dead Ubuntu Pro reboot cmds
|
||||
ua-timer.service loaded inactive dead Ubuntu Pro Timer for running repeated jobs
|
||||
ubuntu-advantage.service loaded inactive dead Ubuntu Pro Background Auto Attach
|
||||
udisks2.service loaded active running Disk Manager
|
||||
ufw.service loaded active exited Uncomplicated firewall
|
||||
unattended-upgrades.service loaded active running Unattended Upgrades Shutdown
|
||||
update-notifier-download.service loaded inactive dead Download data for packages that failed at package install time
|
||||
update-notifier-motd.service loaded inactive dead Check to see whether there is a new version of Ubuntu available
|
||||
upower.service loaded active running Daemon for power management
|
||||
user-runtime-dir@1000.service loaded active exited User Runtime Directory /run/user/1000
|
||||
user@1000.service loaded active running User Manager for UID 1000
|
||||
uuidd.service loaded inactive dead Daemon for generating UUIDs
|
||||
vgauth.service loaded inactive dead Authentication service for virtual machines hosted on VMware
|
||||
wpa_supplicant.service loaded active running WPA supplicant
|
||||
|
||||
Legend: LOAD → Reflects whether the unit definition was properly loaded.
|
||||
ACTIVE → The high-level unit activation state, i.e. generalization of SUB.
|
||||
SUB → The low-level unit activation state, values depend on unit type.
|
||||
|
||||
146 loaded units listed.
|
||||
@ -0,0 +1,98 @@
|
||||
services:
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: redis
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6379:6379"
|
||||
networks:
|
||||
- proxy-net
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
command: redis-server --appendonly yes
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
docker-socket-proxy:
|
||||
image: tecnativa/docker-socket-proxy:latest
|
||||
container_name: docker-socket-proxy
|
||||
restart: unless-stopped
|
||||
userns_mode: "host"
|
||||
user: "0:0"
|
||||
security_opt:
|
||||
- apparmor=unconfined
|
||||
privileged: true
|
||||
group_add:
|
||||
- "988"
|
||||
environment:
|
||||
- CONTAINERS=1
|
||||
- SERVICES=1
|
||||
- TASKS=1
|
||||
- NETWORKS=1
|
||||
- EVENTS=1
|
||||
- VERSION=1
|
||||
- PING=1
|
||||
- AUTH=1
|
||||
- INFO=1
|
||||
- VOLUMES=1
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
networks:
|
||||
- proxy-net
|
||||
|
||||
traefik:
|
||||
image: traefik:v3.6.5
|
||||
container_name: traefik
|
||||
restart: unless-stopped
|
||||
user: "0:0"
|
||||
read_only: false
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
docker-socket-proxy:
|
||||
condition: service_started
|
||||
environment:
|
||||
- DOCKER_HOST=tcp://docker-socket-proxy:2375
|
||||
# - DOCKER_API_VERSION=1.41
|
||||
- CLOUDFLARE_DNS_API_TOKEN=${CLOUDFLARE_DNS_API_TOKEN}
|
||||
- CLOUDFLARE_ZONE_API_TOKEN=${CLOUDFLARE_DNS_API_TOKEN}
|
||||
networks:
|
||||
- proxy-net
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- ./traefik.yml:/traefik.yml:ro
|
||||
- ./traefik-data/dynamic:/dynamic:ro
|
||||
- ./traefik-data/certs:/certs
|
||||
- ./traefik-data/access-logs:/var/log/traefik
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
# Dashboard
|
||||
- "traefik.http.routers.traefik-secure.rule=Host(`proxy.castaldifamily.com`) && (PathPrefix(`/api`) || PathPrefix(`/dashboard`))"
|
||||
- "traefik.http.routers.traefik-secure.entrypoints=websecure"
|
||||
- "traefik.http.routers.traefik-secure.tls=true"
|
||||
- "traefik.http.routers.traefik-secure.tls.certresolver=cloudflare"
|
||||
- "traefik.http.routers.traefik-secure.service=api@internal"
|
||||
- "traefik.http.routers.traefik-secure.middlewares=dashboard-auth@file,security-headers@file,ratelimit-basic@file,dashboard-slash@file"
|
||||
# Root redirect
|
||||
- "traefik.http.routers.traefik-root.rule=Host(`proxy.castaldifamily.com`) && Path(`/`)"
|
||||
- "traefik.http.routers.traefik-root.entrypoints=websecure"
|
||||
- "traefik.http.routers.traefik-root.tls=true"
|
||||
- "traefik.http.routers.traefik-root.tls.certresolver=cloudflare"
|
||||
- "traefik.http.routers.traefik-root.service=api@internal"
|
||||
- "traefik.http.routers.traefik-root.middlewares=redirect-to-dashboard"
|
||||
- "traefik.http.middlewares.redirect-to-dashboard.redirectregex.regex=^/$$"
|
||||
- "traefik.http.middlewares.redirect-to-dashboard.redirectregex.replacement=/dashboard"
|
||||
- "traefik.http.middlewares.redirect-to-dashboard.redirectregex.permanent=true"
|
||||
|
||||
networks:
|
||||
proxy-net:
|
||||
driver: bridge
|
||||
name: proxy-net
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
@ -0,0 +1,975 @@
|
||||
- AppArmorProfile: docker-default
|
||||
Args:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/rootfs
|
||||
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/rootfs
|
||||
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- /bin/node_exporter
|
||||
Env:
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ExposedPorts:
|
||||
9100/tcp: {}
|
||||
Hostname: heimdall
|
||||
Image: prom/node-exporter:latest
|
||||
Labels:
|
||||
maintainer: The Prometheus Authors <prometheus-developers@googlegroups.com>
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
Tty: false
|
||||
User: nobody
|
||||
Volumes: null
|
||||
WorkingDir: ''
|
||||
Created: '2026-03-09T23:15:53.531184328Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop:
|
||||
- ALL
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: null
|
||||
DnsOptions: null
|
||||
DnsSearch: null
|
||||
ExtraHosts: null
|
||||
GroupAdd: null
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths:
|
||||
- /proc/acpi
|
||||
- /proc/asound
|
||||
- /proc/interrupts
|
||||
- /proc/kcore
|
||||
- /proc/keys
|
||||
- /proc/latency_stats
|
||||
- /proc/sched_debug
|
||||
- /proc/scsi
|
||||
- /proc/timer_list
|
||||
- /proc/timer_stats
|
||||
- /sys/devices/virtual/powercap
|
||||
- /sys/firmware
|
||||
- /sys/devices/system/cpu/cpu0/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu1/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu2/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu3/thermal_throttle
|
||||
Memory: 134217728
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 268435456
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 500000000
|
||||
NetworkMode: host
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings: {}
|
||||
Privileged: false
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths:
|
||||
- /proc/bus
|
||||
- /proc/fs
|
||||
- /proc/irq
|
||||
- /proc/sys
|
||||
- /proc/sysrq-trigger
|
||||
ReadonlyRootfs: true
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt:
|
||||
- no-new-privileges:true
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: ''
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/hostname
|
||||
HostsPath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/hosts
|
||||
Id: 3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f
|
||||
Image: sha256:3ac34ce007accad95afed72149e0d2b927b7e42fd1c866149b945b84737c62c3
|
||||
ImageManifestDescriptor:
|
||||
digest: sha256:7bcf2839f207d926b908cd3c566c9f1577efb72268062be0c96cd3b17a5cb283
|
||||
mediaType: application/vnd.docker.distribution.manifest.v2+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 949
|
||||
LogPath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /host/proc
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /proc
|
||||
Type: bind
|
||||
- Destination: /host/sys
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /sys
|
||||
Type: bind
|
||||
- Destination: /rootfs
|
||||
Mode: ro
|
||||
Propagation: rslave
|
||||
RW: false
|
||||
Source: /
|
||||
Type: bind
|
||||
Name: /node-exporter
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
host:
|
||||
Aliases: null
|
||||
DNSNames: null
|
||||
DriverOpts: null
|
||||
EndpointID: d2673440c953463f22ab1da395595e8f898bfab6baa043b2638fa2654fd04e4a
|
||||
Gateway: ''
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: ''
|
||||
IPPrefixLen: 0
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: ''
|
||||
NetworkID: b63c150f50197cfb21939a1369d37f0a309118dfb79be11d4c6082d963f8f70a
|
||||
Ports: {}
|
||||
SandboxID: 770e56f6832d109ab47e3b523e838be28d0bdf51a520cc5c9a07351bcb84f10d
|
||||
SandboxKey: /var/run/docker/netns/default
|
||||
Path: /bin/node_exporter
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/3f397bc8b39d3a9ae4b903f1daf99fdfddd842cb86b549b86c7aba30fe4d7a4f/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '0001-01-01T00:00:00Z'
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 2616285
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-03-09T23:15:53.649932822Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
- AppArmorProfile: docker-default
|
||||
Args:
|
||||
- traefik
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- traefik
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- /entrypoint.sh
|
||||
Env:
|
||||
- CLOUDFLARE_ZONE_API_TOKEN=UQoi9oCZYU6lfIOBbTKCxTz7nvd59F9yqdOuc4Pc
|
||||
- DOCKER_HOST=tcp://docker-socket-proxy:2375
|
||||
- CLOUDFLARE_DNS_API_TOKEN=UQoi9oCZYU6lfIOBbTKCxTz7nvd59F9yqdOuc4Pc
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ExposedPorts:
|
||||
443/tcp: {}
|
||||
80/tcp: {}
|
||||
Hostname: f0c70cc4667e
|
||||
Image: traefik:v3.6.5
|
||||
Labels:
|
||||
com.docker.compose.config-hash: 42df1402e650e630bde14fa90b6287582d9b29068566faaff58ed7ca6d60fffa
|
||||
com.docker.compose.container-number: '1'
|
||||
com.docker.compose.depends_on: redis:service_healthy:false,docker-socket-proxy:service_started:false
|
||||
com.docker.compose.image: sha256:67622638cd88dbfcfba40159bc652ecf0aea0e032f8a3c7e3134ae7c037b9910
|
||||
com.docker.compose.oneoff: 'False'
|
||||
com.docker.compose.project: traefik
|
||||
com.docker.compose.project.config_files: /home/chester/traefik/docker-compose.yml
|
||||
com.docker.compose.project.working_dir: /home/chester/traefik
|
||||
com.docker.compose.replace: traefik
|
||||
com.docker.compose.service: traefik
|
||||
com.docker.compose.version: 5.0.2
|
||||
org.opencontainers.image.description: A modern reverse-proxy
|
||||
org.opencontainers.image.documentation: https://docs.traefik.io
|
||||
org.opencontainers.image.source: https://github.com/traefik/traefik
|
||||
org.opencontainers.image.title: Traefik
|
||||
org.opencontainers.image.url: https://traefik.io
|
||||
org.opencontainers.image.vendor: Traefik Labs
|
||||
org.opencontainers.image.version: v3.6.5
|
||||
traefik.enable: 'true'
|
||||
traefik.http.middlewares.redirect-to-dashboard.redirectregex.permanent: 'true'
|
||||
traefik.http.middlewares.redirect-to-dashboard.redirectregex.regex: ^/$
|
||||
traefik.http.middlewares.redirect-to-dashboard.redirectregex.replacement: /dashboard
|
||||
traefik.http.routers.traefik-root.entrypoints: websecure
|
||||
traefik.http.routers.traefik-root.middlewares: redirect-to-dashboard
|
||||
traefik.http.routers.traefik-root.rule: Host(`proxy.castaldifamily.com`)
|
||||
&& Path(`/`)
|
||||
traefik.http.routers.traefik-root.service: api@internal
|
||||
traefik.http.routers.traefik-root.tls: 'true'
|
||||
traefik.http.routers.traefik-root.tls.certresolver: cloudflare
|
||||
traefik.http.routers.traefik-secure.entrypoints: websecure
|
||||
traefik.http.routers.traefik-secure.middlewares: dashboard-auth@file,security-headers@file,ratelimit-basic@file,dashboard-slash@file
|
||||
traefik.http.routers.traefik-secure.rule: Host(`proxy.castaldifamily.com`)
|
||||
&& (PathPrefix(`/api`) || PathPrefix(`/dashboard`))
|
||||
traefik.http.routers.traefik-secure.service: api@internal
|
||||
traefik.http.routers.traefik-secure.tls: 'true'
|
||||
traefik.http.routers.traefik-secure.tls.certresolver: cloudflare
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
Tty: false
|
||||
User: 0:0
|
||||
Volumes: null
|
||||
WorkingDir: /
|
||||
Created: '2026-01-28T00:34:54.992079505Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- /home/chester/traefik/traefik-data/certs:/certs:rw
|
||||
- /home/chester/traefik/traefik-data/access-logs:/var/log/traefik:rw
|
||||
- /home/chester/traefik/traefik.yml:/traefik.yml:ro
|
||||
- /home/chester/traefik/traefik-data/dynamic:/dynamic:ro
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop: null
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: []
|
||||
DnsOptions: []
|
||||
DnsSearch: []
|
||||
ExtraHosts: []
|
||||
GroupAdd: null
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths:
|
||||
- /proc/acpi
|
||||
- /proc/asound
|
||||
- /proc/interrupts
|
||||
- /proc/kcore
|
||||
- /proc/keys
|
||||
- /proc/latency_stats
|
||||
- /proc/sched_debug
|
||||
- /proc/scsi
|
||||
- /proc/timer_list
|
||||
- /proc/timer_stats
|
||||
- /sys/devices/virtual/powercap
|
||||
- /sys/firmware
|
||||
- /sys/devices/system/cpu/cpu0/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu1/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu2/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu3/thermal_throttle
|
||||
Memory: 0
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 0
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 0
|
||||
NetworkMode: proxy-net
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings:
|
||||
443/tcp:
|
||||
- HostIp: ''
|
||||
HostPort: '443'
|
||||
80/tcp:
|
||||
- HostIp: ''
|
||||
HostPort: '80'
|
||||
Privileged: false
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths:
|
||||
- /proc/bus
|
||||
- /proc/fs
|
||||
- /proc/irq
|
||||
- /proc/sys
|
||||
- /proc/sysrq-trigger
|
||||
ReadonlyRootfs: false
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt: null
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: ''
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/hostname
|
||||
HostsPath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/hosts
|
||||
Id: f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64
|
||||
Image: sha256:67622638cd88dbfcfba40159bc652ecf0aea0e032f8a3c7e3134ae7c037b9910
|
||||
ImageManifestDescriptor:
|
||||
annotations:
|
||||
com.docker.official-images.bashbrew.arch: amd64
|
||||
org.opencontainers.image.base.digest: sha256:1882fa4569e0c591ea092d3766c4893e19b8901a8e649de7067188aba3cc0679
|
||||
org.opencontainers.image.base.name: alpine:3.23
|
||||
org.opencontainers.image.created: '2025-12-18T00:37:28Z'
|
||||
org.opencontainers.image.revision: 87ae3f90a938b0159e557ba5b6abcfd63effb714
|
||||
org.opencontainers.image.source: https://github.com/traefik/traefik-library-image.git#87ae3f90a938b0159e557ba5b6abcfd63effb714:v3.6/alpine
|
||||
org.opencontainers.image.url: https://hub.docker.com/_/traefik
|
||||
org.opencontainers.image.version: v3.6.5
|
||||
digest: sha256:d944e3693bbf5a361ddd2e411bb713049cfb4f5ff3da200b30ee7a347dbd6abd
|
||||
mediaType: application/vnd.oci.image.manifest.v1+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 1728
|
||||
LogPath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /traefik.yml
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /home/chester/traefik/traefik.yml
|
||||
Type: bind
|
||||
- Destination: /var/log/traefik
|
||||
Mode: rw
|
||||
Propagation: rprivate
|
||||
RW: true
|
||||
Source: /home/chester/traefik/traefik-data/access-logs
|
||||
Type: bind
|
||||
- Destination: /certs
|
||||
Mode: rw
|
||||
Propagation: rprivate
|
||||
RW: true
|
||||
Source: /home/chester/traefik/traefik-data/certs
|
||||
Type: bind
|
||||
- Destination: /dynamic
|
||||
Mode: ro
|
||||
Propagation: rprivate
|
||||
RW: false
|
||||
Source: /home/chester/traefik/traefik-data/dynamic
|
||||
Type: bind
|
||||
Name: /traefik
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
proxy-net:
|
||||
Aliases:
|
||||
- traefik
|
||||
- traefik
|
||||
DNSNames:
|
||||
- traefik
|
||||
- f0c70cc4667e
|
||||
DriverOpts: null
|
||||
EndpointID: 85312d375679f81387f54387dc176918f159b3c5527b527a10da91b36dc3c8f5
|
||||
Gateway: 172.18.0.1
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: 172.18.0.3
|
||||
IPPrefixLen: 16
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: c2:85:cb:12:fe:61
|
||||
NetworkID: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Ports:
|
||||
443/tcp:
|
||||
- HostIp: 0.0.0.0
|
||||
HostPort: '443'
|
||||
- HostIp: '::'
|
||||
HostPort: '443'
|
||||
80/tcp:
|
||||
- HostIp: 0.0.0.0
|
||||
HostPort: '80'
|
||||
- HostIp: '::'
|
||||
HostPort: '80'
|
||||
SandboxID: 39e089426b97fd8075a6b4fad29d0cdc3fa77b73e28f8ef96bef68e3418b7fb1
|
||||
SandboxKey: /var/run/docker/netns/39e089426b97
|
||||
Path: /entrypoint.sh
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/f0c70cc4667e2bfb834ed92486be28d836c399dbeb84fa26bd84f49579562c64/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '2026-02-21T18:15:51.551714695Z'
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 1213
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-02-21T18:30:42.488013871Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
- AppArmorProfile: unconfined
|
||||
Args:
|
||||
- haproxy
|
||||
- -f
|
||||
- /tmp/haproxy.cfg
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- haproxy
|
||||
- -f
|
||||
- /tmp/haproxy.cfg
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- docker-entrypoint.sh
|
||||
Env:
|
||||
- INFO=1
|
||||
- SERVICES=1
|
||||
- TASKS=1
|
||||
- PING=1
|
||||
- AUTH=1
|
||||
- VERSION=1
|
||||
- EVENTS=1
|
||||
- NETWORKS=1
|
||||
- CONTAINERS=1
|
||||
- VOLUMES=1
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
- HAPROXY_VERSION=3.2.4
|
||||
- HAPROXY_URL=https://www.haproxy.org/download/3.2/src/haproxy-3.2.4.tar.gz
|
||||
- HAPROXY_SHA256=5d4b2ee6fe56b8098ebb9c91a899d728f87d64cd7be8804d2ddcc5f937498c1d
|
||||
- ALLOW_RESTARTS=0
|
||||
- ALLOW_STOP=0
|
||||
- ALLOW_START=0
|
||||
- BUILD=0
|
||||
- COMMIT=0
|
||||
- CONFIGS=0
|
||||
- DISABLE_IPV6=0
|
||||
- DISTRIBUTION=0
|
||||
- EXEC=0
|
||||
- GRPC=0
|
||||
- IMAGES=0
|
||||
- LOG_LEVEL=info
|
||||
- NODES=0
|
||||
- PLUGINS=0
|
||||
- POST=0
|
||||
- SECRETS=0
|
||||
- SESSION=0
|
||||
- SOCKET_PATH=/var/run/docker.sock
|
||||
- SWARM=0
|
||||
- SYSTEM=0
|
||||
ExposedPorts:
|
||||
2375/tcp: {}
|
||||
Hostname: f59c3a7d4c30
|
||||
Image: tecnativa/docker-socket-proxy:latest
|
||||
Labels:
|
||||
com.docker.compose.config-hash: 711c15ad420cb4274f3a65832d36be4bc31327a53f09b84b803d0e1ab18a0917
|
||||
com.docker.compose.container-number: '1'
|
||||
com.docker.compose.depends_on: ''
|
||||
com.docker.compose.image: sha256:1f3a6f303320723d199d2316a3e82b2e2685d86c275d5e3deeaf182573b47476
|
||||
com.docker.compose.oneoff: 'False'
|
||||
com.docker.compose.project: traefik
|
||||
com.docker.compose.project.config_files: /home/chester/traefik/docker-compose.yml
|
||||
com.docker.compose.project.working_dir: /home/chester/traefik
|
||||
com.docker.compose.replace: docker-socket-proxy
|
||||
com.docker.compose.service: docker-socket-proxy
|
||||
com.docker.compose.version: 5.0.2
|
||||
org.opencontainers.image.created: '2025-12-16T07:26:21.623Z'
|
||||
org.opencontainers.image.description: Proxy over your Docker socket to
|
||||
restrict which requests it accepts
|
||||
org.opencontainers.image.licenses: Apache-2.0
|
||||
org.opencontainers.image.revision: 2f04313b042c1bf4dfbd039475dfc42db79bde7a
|
||||
org.opencontainers.image.source: https://github.com/Tecnativa/docker-socket-proxy
|
||||
org.opencontainers.image.title: docker-socket-proxy
|
||||
org.opencontainers.image.url: https://github.com/Tecnativa/docker-socket-proxy
|
||||
org.opencontainers.image.version: v0.4.2
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
StopSignal: SIGUSR1
|
||||
Tty: false
|
||||
User: 0:0
|
||||
Volumes: null
|
||||
WorkingDir: /var/lib/haproxy
|
||||
Created: '2026-01-28T00:34:44.663698444Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:rw
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop: null
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: []
|
||||
DnsOptions: []
|
||||
DnsSearch: []
|
||||
ExtraHosts: []
|
||||
GroupAdd:
|
||||
- '988'
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths: null
|
||||
Memory: 0
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 0
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 0
|
||||
NetworkMode: proxy-net
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings: {}
|
||||
Privileged: true
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths: null
|
||||
ReadonlyRootfs: false
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt:
|
||||
- apparmor=unconfined
|
||||
- label=disable
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: host
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/hostname
|
||||
HostsPath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/hosts
|
||||
Id: f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56
|
||||
Image: sha256:1f3a6f303320723d199d2316a3e82b2e2685d86c275d5e3deeaf182573b47476
|
||||
ImageManifestDescriptor:
|
||||
digest: sha256:bd2241b3bec83abcff25927a0a7ae518e0c5bef624b3cc247dcb31e68b53f417
|
||||
mediaType: application/vnd.oci.image.manifest.v1+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 1993
|
||||
LogPath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /var/run/docker.sock
|
||||
Mode: rw
|
||||
Propagation: rprivate
|
||||
RW: true
|
||||
Source: /var/run/docker.sock
|
||||
Type: bind
|
||||
Name: /docker-socket-proxy
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
proxy-net:
|
||||
Aliases:
|
||||
- docker-socket-proxy
|
||||
- docker-socket-proxy
|
||||
DNSNames:
|
||||
- docker-socket-proxy
|
||||
- f59c3a7d4c30
|
||||
DriverOpts: null
|
||||
EndpointID: cb18a5396cca6ed0b3c3502b8e8e2d46eb39a5afaa7350e2dd2ea9ee5448d7d3
|
||||
Gateway: 172.18.0.1
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: 172.18.0.2
|
||||
IPPrefixLen: 16
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: 42:a5:f6:d2:52:08
|
||||
NetworkID: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Ports:
|
||||
2375/tcp: null
|
||||
SandboxID: e0902b280ba958f8f4ee51c20eb33a563b8bfc1717f3fbf4dd012a05672f3e74
|
||||
SandboxKey: /var/run/docker/netns/e0902b280ba9
|
||||
Path: docker-entrypoint.sh
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/f59c3a7d4c3036a26bb8f060aa209b06bcb52d9d0bc41e32a750b36f4df3ae56/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '2026-02-21T18:16:00.055009796Z'
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 1225
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-02-21T18:30:42.49130796Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
- AppArmorProfile: docker-default
|
||||
Args:
|
||||
- redis-server
|
||||
- --appendonly
|
||||
- 'yes'
|
||||
Config:
|
||||
AttachStderr: true
|
||||
AttachStdin: false
|
||||
AttachStdout: true
|
||||
Cmd:
|
||||
- redis-server
|
||||
- --appendonly
|
||||
- 'yes'
|
||||
Domainname: ''
|
||||
Entrypoint:
|
||||
- docker-entrypoint.sh
|
||||
Env:
|
||||
- PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
- GOSU_VERSION=1.17
|
||||
- REDIS_VERSION=7.4.7
|
||||
- REDIS_DOWNLOAD_URL=http://download.redis.io/releases/redis-7.4.7.tar.gz
|
||||
- REDIS_DOWNLOAD_SHA=c97e57b0df330a9e091cacff012bebe763c275398cf36ff44cdba876814b595b
|
||||
ExposedPorts:
|
||||
6379/tcp: {}
|
||||
Healthcheck:
|
||||
Interval: 10000000000
|
||||
Retries: 5
|
||||
Test:
|
||||
- CMD
|
||||
- redis-cli
|
||||
- ping
|
||||
Timeout: 5000000000
|
||||
Hostname: 57439684f5ef
|
||||
Image: redis:7-alpine
|
||||
Labels:
|
||||
com.docker.compose.config-hash: eb5826610c0f348a70810f75902caa3d6b889a5e442c0d9ddc539355c0113f49
|
||||
com.docker.compose.container-number: '1'
|
||||
com.docker.compose.depends_on: ''
|
||||
com.docker.compose.image: sha256:ee64a64eaab618d88051c3ade8f6352d11531fcf79d9a4818b9b183d8c1d18ba
|
||||
com.docker.compose.oneoff: 'False'
|
||||
com.docker.compose.project: traefik
|
||||
com.docker.compose.project.config_files: /home/chester/traefik/docker-compose.yml
|
||||
com.docker.compose.project.working_dir: /home/chester/traefik
|
||||
com.docker.compose.replace: redis
|
||||
com.docker.compose.service: redis
|
||||
com.docker.compose.version: 5.0.2
|
||||
OpenStdin: false
|
||||
StdinOnce: false
|
||||
Tty: false
|
||||
User: ''
|
||||
Volumes:
|
||||
/data: {}
|
||||
WorkingDir: /data
|
||||
Created: '2026-01-28T00:34:44.662867915Z'
|
||||
Driver: overlayfs
|
||||
ExecIDs: null
|
||||
HostConfig:
|
||||
AutoRemove: false
|
||||
Binds:
|
||||
- traefik_redis-data:/data:rw
|
||||
BlkioDeviceReadBps: null
|
||||
BlkioDeviceReadIOps: null
|
||||
BlkioDeviceWriteBps: null
|
||||
BlkioDeviceWriteIOps: null
|
||||
BlkioWeight: 0
|
||||
BlkioWeightDevice: null
|
||||
CapAdd: null
|
||||
CapDrop: null
|
||||
Cgroup: ''
|
||||
CgroupParent: ''
|
||||
CgroupnsMode: private
|
||||
ConsoleSize:
|
||||
- 0
|
||||
- 0
|
||||
ContainerIDFile: ''
|
||||
CpuCount: 0
|
||||
CpuPercent: 0
|
||||
CpuPeriod: 0
|
||||
CpuQuota: 0
|
||||
CpuRealtimePeriod: 0
|
||||
CpuRealtimeRuntime: 0
|
||||
CpuShares: 0
|
||||
CpusetCpus: ''
|
||||
CpusetMems: ''
|
||||
DeviceCgroupRules: null
|
||||
DeviceRequests: null
|
||||
Devices: null
|
||||
Dns: []
|
||||
DnsOptions: []
|
||||
DnsSearch: []
|
||||
ExtraHosts: []
|
||||
GroupAdd: null
|
||||
IOMaximumBandwidth: 0
|
||||
IOMaximumIOps: 0
|
||||
IpcMode: private
|
||||
Isolation: ''
|
||||
Links: null
|
||||
LogConfig:
|
||||
Config: {}
|
||||
Type: json-file
|
||||
MaskedPaths:
|
||||
- /proc/acpi
|
||||
- /proc/asound
|
||||
- /proc/interrupts
|
||||
- /proc/kcore
|
||||
- /proc/keys
|
||||
- /proc/latency_stats
|
||||
- /proc/sched_debug
|
||||
- /proc/scsi
|
||||
- /proc/timer_list
|
||||
- /proc/timer_stats
|
||||
- /sys/devices/virtual/powercap
|
||||
- /sys/firmware
|
||||
- /sys/devices/system/cpu/cpu0/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu1/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu2/thermal_throttle
|
||||
- /sys/devices/system/cpu/cpu3/thermal_throttle
|
||||
Memory: 0
|
||||
MemoryReservation: 0
|
||||
MemorySwap: 0
|
||||
MemorySwappiness: null
|
||||
NanoCpus: 0
|
||||
NetworkMode: proxy-net
|
||||
OomKillDisable: null
|
||||
OomScoreAdj: 0
|
||||
PidMode: ''
|
||||
PidsLimit: null
|
||||
PortBindings:
|
||||
6379/tcp:
|
||||
- HostIp: ''
|
||||
HostPort: '6379'
|
||||
Privileged: false
|
||||
PublishAllPorts: false
|
||||
ReadonlyPaths:
|
||||
- /proc/bus
|
||||
- /proc/fs
|
||||
- /proc/irq
|
||||
- /proc/sys
|
||||
- /proc/sysrq-trigger
|
||||
ReadonlyRootfs: false
|
||||
RestartPolicy:
|
||||
MaximumRetryCount: 0
|
||||
Name: unless-stopped
|
||||
Runtime: runc
|
||||
SecurityOpt: null
|
||||
ShmSize: 67108864
|
||||
UTSMode: ''
|
||||
Ulimits: null
|
||||
UsernsMode: ''
|
||||
VolumeDriver: ''
|
||||
VolumesFrom: null
|
||||
HostnamePath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/hostname
|
||||
HostsPath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/hosts
|
||||
Id: 57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2
|
||||
Image: sha256:ee64a64eaab618d88051c3ade8f6352d11531fcf79d9a4818b9b183d8c1d18ba
|
||||
ImageManifestDescriptor:
|
||||
annotations:
|
||||
com.docker.official-images.bashbrew.arch: amd64
|
||||
org.opencontainers.image.base.digest: sha256:41c81533144786e0beb2b148667355a6c7659aa99a14ed837ff15a98ca9d71f3
|
||||
org.opencontainers.image.base.name: alpine:3.21
|
||||
org.opencontainers.image.created: '2025-11-03T17:38:49Z'
|
||||
org.opencontainers.image.revision: d42d7aec93b1c54dd46f37a66a92f62478456039
|
||||
org.opencontainers.image.source: https://github.com/redis/docker-library-redis.git#d42d7aec93b1c54dd46f37a66a92f62478456039:7.4/alpine
|
||||
org.opencontainers.image.url: https://hub.docker.com/_/redis
|
||||
org.opencontainers.image.version: 7.4.7-alpine
|
||||
digest: sha256:4706ecab5371690fecfdd782268929c94ad5b5ce9ce0b35bfdfe191c4ad17851
|
||||
mediaType: application/vnd.oci.image.manifest.v1+json
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
size: 2483
|
||||
LogPath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2-json.log
|
||||
MountLabel: ''
|
||||
Mounts:
|
||||
- Destination: /data
|
||||
Driver: local
|
||||
Mode: rw
|
||||
Name: traefik_redis-data
|
||||
Propagation: ''
|
||||
RW: true
|
||||
Source: /var/lib/docker/volumes/traefik_redis-data/_data
|
||||
Type: volume
|
||||
Name: /redis
|
||||
NetworkSettings:
|
||||
Networks:
|
||||
proxy-net:
|
||||
Aliases:
|
||||
- redis
|
||||
- redis
|
||||
DNSNames:
|
||||
- redis
|
||||
- 57439684f5ef
|
||||
DriverOpts: null
|
||||
EndpointID: 7f950d9aab3bf29937a2c66723f8fd483984fa9ccd74a859166e810c77a9ca0b
|
||||
Gateway: 172.18.0.1
|
||||
GlobalIPv6Address: ''
|
||||
GlobalIPv6PrefixLen: 0
|
||||
GwPriority: 0
|
||||
IPAMConfig: null
|
||||
IPAddress: 172.18.0.4
|
||||
IPPrefixLen: 16
|
||||
IPv6Gateway: ''
|
||||
Links: null
|
||||
MacAddress: e2:9b:a3:07:2f:81
|
||||
NetworkID: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Ports:
|
||||
6379/tcp:
|
||||
- HostIp: 0.0.0.0
|
||||
HostPort: '6379'
|
||||
- HostIp: '::'
|
||||
HostPort: '6379'
|
||||
SandboxID: dfafbd7bf0a46788747bcf7e8cbe9dcfc05886cdbb73add6cde8d3f50eeed30d
|
||||
SandboxKey: /var/run/docker/netns/dfafbd7bf0a4
|
||||
Path: docker-entrypoint.sh
|
||||
Platform: linux
|
||||
ProcessLabel: ''
|
||||
ResolvConfPath: /var/lib/docker/containers/57439684f5eff5afa67108c958725c641ff4b0299917774c93d91d5ce7b614b2/resolv.conf
|
||||
RestartCount: 0
|
||||
State:
|
||||
Dead: false
|
||||
Error: ''
|
||||
ExitCode: 0
|
||||
FinishedAt: '2026-02-21T18:15:50.121096266Z'
|
||||
Health:
|
||||
FailingStreak: 0
|
||||
Log:
|
||||
- End: '2026-03-12T21:40:46.09861824Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:40:46.035694287Z'
|
||||
- End: '2026-03-12T21:40:56.156972993Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:40:56.09903008Z'
|
||||
- End: '2026-03-12T21:41:06.212479164Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:41:06.158068315Z'
|
||||
- End: '2026-03-12T21:41:16.254915792Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:41:16.213809696Z'
|
||||
- End: '2026-03-12T21:41:26.295890532Z'
|
||||
ExitCode: 0
|
||||
Output: 'PONG
|
||||
|
||||
'
|
||||
Start: '2026-03-12T21:41:26.255822169Z'
|
||||
Status: healthy
|
||||
OOMKilled: false
|
||||
Paused: false
|
||||
Pid: 1220
|
||||
Restarting: false
|
||||
Running: true
|
||||
StartedAt: '2026-02-21T18:30:42.486966925Z'
|
||||
Status: running
|
||||
Storage:
|
||||
RootFS:
|
||||
Snapshot:
|
||||
Name: overlayfs
|
||||
@ -0,0 +1,8 @@
|
||||
cgroup_driver: systemd
|
||||
containers_running: 4
|
||||
containers_total: 4
|
||||
daemon_config: {}
|
||||
logging_driver: json-file
|
||||
server_version: 29.2.0
|
||||
storage_driver: overlayfs
|
||||
swarm_state: inactive
|
||||
@ -0,0 +1,7 @@
|
||||
# Env key inventory — values REDACTED for security
|
||||
# Source: /home/chester/traefik/.env
|
||||
# Host: heimdall | Captured: 2026-03-12T21:41:19Z
|
||||
#
|
||||
# To restore secrets: ansible-vault encrypt_string '<value>' --name '<KEY>'
|
||||
CLOUDFLARE_DNS_API_TOKEN=<REDACTED>
|
||||
CLOUDFLARE_ZONE_API_TOKEN=<REDACTED>
|
||||
@ -0,0 +1,49 @@
|
||||
# Firewall state on heimdall
|
||||
# Captured: 2026-03-12T21:41:19Z
|
||||
|
||||
## UFW STATUS
|
||||
Status: inactive
|
||||
|
||||
## IPTABLES (reference)
|
||||
Chain INPUT (policy ACCEPT)
|
||||
num target prot opt source destination
|
||||
|
||||
Chain FORWARD (policy DROP)
|
||||
num target prot opt source destination
|
||||
1 DOCKER-USER 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
2 DOCKER-FORWARD 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain OUTPUT (policy ACCEPT)
|
||||
num target prot opt source destination
|
||||
|
||||
Chain DOCKER (2 references)
|
||||
num target prot opt source destination
|
||||
1 ACCEPT 6 -- 0.0.0.0/0 172.18.0.4 tcp dpt:6379
|
||||
2 ACCEPT 6 -- 0.0.0.0/0 172.18.0.3 tcp dpt:443
|
||||
3 ACCEPT 6 -- 0.0.0.0/0 172.18.0.3 tcp dpt:80
|
||||
4 DROP 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
5 DROP 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-BRIDGE (1 references)
|
||||
num target prot opt source destination
|
||||
1 DOCKER 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
2 DOCKER 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-CT (1 references)
|
||||
num target prot opt source destination
|
||||
1 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
|
||||
2 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0 ctstate RELATED,ESTABLISHED
|
||||
|
||||
Chain DOCKER-FORWARD (1 references)
|
||||
num target prot opt source destination
|
||||
1 DOCKER-CT 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
2 DOCKER-INTERNAL 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
3 DOCKER-BRIDGE 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
4 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
5 ACCEPT 0 -- 0.0.0.0/0 0.0.0.0/0
|
||||
|
||||
Chain DOCKER-INTERNAL (1 references)
|
||||
num target prot opt source destination
|
||||
|
||||
Chain DOCKER-USER (1 references)
|
||||
num target prot opt source destination
|
||||
@ -0,0 +1,36 @@
|
||||
ansible_user: root
|
||||
architecture: x86_64
|
||||
cpu_vcpus: 4
|
||||
default_ipv4:
|
||||
address: 10.0.0.151
|
||||
alias: enp1s0
|
||||
broadcast: 10.0.0.255
|
||||
gateway: 10.0.0.2
|
||||
interface: enp1s0
|
||||
macaddress: 7c:83:34:bf:79:a5
|
||||
mtu: 1500
|
||||
netmask: 255.255.255.0
|
||||
network: 10.0.0.0
|
||||
prefix: '24'
|
||||
type: ether
|
||||
distribution: Ubuntu
|
||||
distribution_release: noble
|
||||
distribution_version: '24.04'
|
||||
fqdn: heimdall
|
||||
hostname: heimdall
|
||||
interfaces:
|
||||
- enp2s0
|
||||
- wlo1
|
||||
- enp1s0
|
||||
- vethe43b71e
|
||||
- br-c451239da54e
|
||||
- lo
|
||||
- veth2088d3d
|
||||
- veth57f15b2
|
||||
- docker0
|
||||
kernel: 6.8.0-100-generic
|
||||
memory_free_mb: 342
|
||||
memory_total_mb: 15767
|
||||
os_family: Debian
|
||||
python_version: 3.12.3
|
||||
uptime_seconds: 1653162
|
||||
@ -0,0 +1,65 @@
|
||||
---
|
||||
---
|
||||
# Heimdall baseline capture manifest
|
||||
# Generated: 2026-03-12T21:41:19Z
|
||||
# Host: heimdall (10.0.0.151)
|
||||
# Review this file before proceeding to heimdall_edge role refactor.
|
||||
|
||||
capture_timestamp: "2026-03-12T21:41:19Z"
|
||||
capture_dir: "/home/chester/homelab/ansible/playbooks/preflight/../../outputs/heimdall-baseline-20260312T214117"
|
||||
|
||||
host:
|
||||
hostname: "heimdall"
|
||||
ip: "10.0.0.151"
|
||||
os: "Ubuntu 24.04"
|
||||
kernel: "6.8.0-100-generic"
|
||||
|
||||
docker:
|
||||
version: "29.2.0"
|
||||
storage_driver: "overlayfs"
|
||||
swarm_state: "inactive"
|
||||
containers_running: 4
|
||||
containers_total: 4
|
||||
|
||||
inventory:
|
||||
containers_found: 4
|
||||
compose_files_found: 2
|
||||
env_files_found: 2
|
||||
|
||||
critical_paths:
|
||||
/etc/docker/daemon.json: false
|
||||
/home/chester/traefik: true
|
||||
/home/chester/traefik/.env: true
|
||||
/home/chester/traefik/docker-compose.yml: true
|
||||
/home/chester/traefik/traefik-data/certs/acme.json: true
|
||||
/home/chester/traefik/traefik-data/dynamic/middleware.yml: true
|
||||
/home/chester/traefik/traefik-data/dynamic/static-backends.yml: true
|
||||
/home/chester/traefik/traefik.yml: true
|
||||
/opt/stacks/heimdall: false
|
||||
/opt/stacks/heimdall/.env: false
|
||||
/opt/stacks/heimdall/docker-compose.yml: false
|
||||
/opt/stacks/heimdall/redis-data: false
|
||||
/opt/stacks/heimdall/runner-data: false
|
||||
/opt/stacks/heimdall/traefik-certs: false
|
||||
/opt/stacks/heimdall/traefik-certs/acme.json: false
|
||||
|
||||
compose_file_paths:
|
||||
- /home/chester/traefik/docker-compose.yml
|
||||
- /home/chester/traefik/docker-compose.yml
|
||||
|
||||
env_file_paths:
|
||||
- /home/chester/traefik/.env
|
||||
- /home/chester/traefik/.env
|
||||
|
||||
containers_running:
|
||||
- node-exporter
|
||||
- traefik
|
||||
- docker-socket-proxy
|
||||
- redis
|
||||
|
||||
validation:
|
||||
compose_files_present: True
|
||||
containers_present: True
|
||||
stack_dir_present: False
|
||||
compose_present: False
|
||||
env_present: False
|
||||
@ -0,0 +1,25 @@
|
||||
---
|
||||
# Docker network and volume inventory
|
||||
# Host: heimdall | Captured: 2026-03-12T21:41:19Z
|
||||
|
||||
networks:
|
||||
- Driver: host
|
||||
Id: b63c150f50197cfb21939a1369d37f0a309118dfb79be11d4c6082d963f8f70a
|
||||
Name: host
|
||||
Scope: local
|
||||
- Driver: bridge
|
||||
Id: c451239da54e830d98844b541d0b707cc63426ce475d5103dc86300c0ebb7160
|
||||
Name: proxy-net
|
||||
Scope: local
|
||||
- Driver: bridge
|
||||
Id: 4f3815cff81bd0c59f62e0151bc58bc0289eca4634f77bf544e1fc3e34c0bab7
|
||||
Name: bridge
|
||||
Scope: local
|
||||
- Driver: 'null'
|
||||
Id: a55e7a3ec6e204eae20086edec67507e3c7ef59f5e383d4b8631d614c657e0d0
|
||||
Name: none
|
||||
Scope: local
|
||||
|
||||
volumes:
|
||||
- Driver: local
|
||||
Name: traefik_redis-data
|
||||
@ -0,0 +1,153 @@
|
||||
UNIT LOAD ACTIVE SUB DESCRIPTION
|
||||
apparmor.service loaded active exited Load AppArmor profiles
|
||||
apport-autoreport.service loaded inactive dead Process error reports when automatic reporting is enabled
|
||||
apport.service loaded active exited automatic crash report generation
|
||||
apt-daily-upgrade.service loaded inactive dead Daily apt upgrade and clean activities
|
||||
apt-daily.service loaded inactive dead Daily apt download activities
|
||||
blk-availability.service loaded active exited Availability of block devices
|
||||
cloud-init-local.service loaded inactive dead Cloud-init: Local Stage (pre-network)
|
||||
console-setup.service loaded active exited Set console font and keymap
|
||||
containerd.service loaded active running containerd container runtime
|
||||
cron.service loaded active running Regular background program processing daemon
|
||||
dbus.service loaded active running D-Bus System Message Bus
|
||||
dm-event.service loaded inactive dead Device-mapper event daemon
|
||||
dmesg.service loaded inactive dead Save initial kernel messages after boot
|
||||
docker.service loaded active running Docker Application Container Engine
|
||||
dpkg-db-backup.service loaded inactive dead Daily dpkg database backup service
|
||||
e2scrub_all.service loaded inactive dead Online ext4 Metadata Check for All Filesystems
|
||||
e2scrub_reap.service loaded inactive dead Remove Stale Online ext4 Metadata Check Snapshots
|
||||
emergency.service loaded inactive dead Emergency Shell
|
||||
finalrd.service loaded active exited Create final runtime dir for shutdown pivot root
|
||||
fstrim.service loaded inactive dead Discard unused blocks on filesystems from /etc/fstab
|
||||
fwupd-refresh.service loaded inactive dead Refresh fwupd metadata and update motd
|
||||
getty-static.service loaded inactive dead getty on tty2-tty6 if dbus and logind are not available
|
||||
getty@tty1.service loaded active running Getty on tty1
|
||||
grub-common.service loaded inactive dead Record successful boot for GRUB
|
||||
grub-initrd-fallback.service loaded inactive dead GRUB failed boot detection
|
||||
initrd-cleanup.service loaded inactive dead Cleaning Up and Shutting Down Daemons
|
||||
initrd-parse-etc.service loaded inactive dead Mountpoints Configured in the Real Root
|
||||
initrd-switch-root.service loaded inactive dead Switch Root
|
||||
initrd-udevadm-cleanup-db.service loaded inactive dead Cleanup udev Database
|
||||
iscsid.service loaded inactive dead iSCSI initiator daemon (iscsid)
|
||||
keyboard-setup.service loaded active exited Set the console keyboard layout
|
||||
kmod-static-nodes.service loaded active exited Create List of Static Device Nodes
|
||||
ldconfig.service loaded inactive dead Rebuild Dynamic Linker Cache
|
||||
logrotate.service loaded inactive dead Rotate log files
|
||||
lvm2-lvmpolld.service loaded inactive dead LVM2 poll daemon
|
||||
lvm2-monitor.service loaded active exited Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling
|
||||
man-db.service loaded inactive dead Daily man-db regeneration
|
||||
ModemManager.service loaded active running Modem Manager
|
||||
modprobe@configfs.service loaded inactive dead Load Kernel Module configfs
|
||||
modprobe@dm_mod.service loaded inactive dead Load Kernel Module dm_mod
|
||||
modprobe@drm.service loaded inactive dead Load Kernel Module drm
|
||||
modprobe@efi_pstore.service loaded inactive dead Load Kernel Module efi_pstore
|
||||
modprobe@fuse.service loaded inactive dead Load Kernel Module fuse
|
||||
modprobe@loop.service loaded inactive dead Load Kernel Module loop
|
||||
motd-news.service loaded inactive dead Message of the Day
|
||||
multipathd.service loaded active running Device-Mapper Multipath Device Controller
|
||||
netplan-ovs-cleanup.service loaded inactive dead OpenVSwitch configuration for cleanup
|
||||
networkd-dispatcher.service loaded inactive dead Dispatcher daemon for systemd-networkd
|
||||
open-iscsi.service loaded inactive dead Login to default iSCSI targets
|
||||
open-vm-tools.service loaded inactive dead Service for virtual machines hosted on VMware
|
||||
plymouth-quit-wait.service loaded active exited Hold until boot process finishes up
|
||||
plymouth-quit.service loaded active exited Terminate Plymouth Boot Screen
|
||||
plymouth-read-write.service loaded active exited Tell Plymouth To Write Out Runtime Data
|
||||
plymouth-start.service loaded inactive dead Show Plymouth Boot Screen
|
||||
plymouth-switch-root.service loaded inactive dead Plymouth switch root service
|
||||
polkit.service loaded active running Authorization Manager
|
||||
pollinate.service loaded inactive dead Pollinate to seed the pseudo random number generator
|
||||
rc-local.service loaded inactive dead /etc/rc.local Compatibility
|
||||
rescue.service loaded inactive dead Rescue Shell
|
||||
rsyslog.service loaded active running System Logging Service
|
||||
secureboot-db.service loaded inactive dead Secure Boot updates for DB and DBX
|
||||
setvtrgb.service loaded active exited Set console scheme
|
||||
snapd.apparmor.service loaded active exited Load AppArmor profiles managed internally by snapd
|
||||
snapd.autoimport.service loaded inactive dead Auto import assertions from block devices
|
||||
snapd.core-fixup.service loaded inactive dead Automatically repair incorrect owner/permissions on core devices
|
||||
snapd.failure.service loaded inactive dead Failure handling of the snapd snap
|
||||
snapd.recovery-chooser-trigger.service loaded inactive dead Wait for the Ubuntu Core chooser trigger
|
||||
snapd.seeded.service loaded active exited Wait until snapd is fully seeded
|
||||
snapd.service loaded inactive dead Snap Daemon
|
||||
snapd.snap-repair.service loaded inactive dead Automatically fetch and run repair assertions
|
||||
snapd.system-shutdown.service loaded inactive dead Ubuntu core (all-snaps) system shutdown helper setup service
|
||||
ssh.service loaded active running OpenBSD Secure Shell server
|
||||
sysstat-collect.service loaded inactive dead system activity accounting tool
|
||||
sysstat-summary.service loaded inactive dead Generate a daily summary of process accounting
|
||||
sysstat.service loaded active exited Resets System Activity Logs
|
||||
systemd-ask-password-console.service loaded inactive dead Dispatch Password Requests to Console
|
||||
systemd-ask-password-plymouth.service loaded inactive dead Forward Password Requests to Plymouth
|
||||
systemd-ask-password-wall.service loaded inactive dead Forward Password Requests to Wall
|
||||
systemd-battery-check.service loaded inactive dead Check battery level during early boot
|
||||
systemd-binfmt.service loaded active exited Set Up Additional Binary Formats
|
||||
systemd-bsod.service loaded inactive dead Displays emergency message in full screen.
|
||||
systemd-firstboot.service loaded inactive dead First Boot Wizard
|
||||
systemd-fsck-root.service loaded inactive dead File System Check on Root Device
|
||||
systemd-fsck@dev-disk-by\x2duuid-36D5\x2d0248.service loaded active exited File System Check on /dev/disk/by-uuid/36D5-0248
|
||||
systemd-fsck@dev-disk-by\x2duuid-da3c4a6e\x2df851\x2d471f\x2d81e4\x2dcd9b3b26acf1.service loaded active exited File System Check on /dev/disk/by-uuid/da3c4a6e-f851-471f-81e4-cd9b3b26acf1
|
||||
systemd-fsckd.service loaded inactive dead File System Check Daemon to report status
|
||||
systemd-hibernate-resume.service loaded inactive dead Resume from hibernation
|
||||
systemd-hibernate.service loaded inactive dead System Hibernate
|
||||
systemd-hwdb-update.service loaded inactive dead Rebuild Hardware Database
|
||||
systemd-hybrid-sleep.service loaded inactive dead System Hybrid Suspend+Hibernate
|
||||
systemd-initctl.service loaded inactive dead initctl Compatibility Daemon
|
||||
systemd-journal-catalog-update.service loaded inactive dead Rebuild Journal Catalog
|
||||
systemd-journal-flush.service loaded active exited Flush Journal to Persistent Storage
|
||||
systemd-journald.service loaded active running Journal Service
|
||||
systemd-logind.service loaded active running User Login Management
|
||||
systemd-machine-id-commit.service loaded inactive dead Commit a transient machine-id on disk
|
||||
systemd-modules-load.service loaded active exited Load Kernel Modules
|
||||
● systemd-networkd-wait-online.service loaded failed failed Wait for Network to be Configured
|
||||
systemd-networkd.service loaded active running Network Configuration
|
||||
systemd-pcrmachine.service loaded inactive dead TPM2 PCR Machine ID Measurement
|
||||
systemd-pcrphase-initrd.service loaded inactive dead TPM2 PCR Barrier (initrd)
|
||||
systemd-pcrphase-sysinit.service loaded inactive dead TPM2 PCR Barrier (Initialization)
|
||||
systemd-pcrphase.service loaded inactive dead TPM2 PCR Barrier (User)
|
||||
systemd-pstore.service loaded inactive dead Platform Persistent Storage Archival
|
||||
systemd-quotacheck.service loaded inactive dead File System Quota Check
|
||||
systemd-random-seed.service loaded active exited Load/Save OS Random Seed
|
||||
systemd-remount-fs.service loaded active exited Remount Root and Kernel File Systems
|
||||
systemd-repart.service loaded inactive dead Repartition Root Disk
|
||||
systemd-resolved.service loaded active running Network Name Resolution
|
||||
systemd-rfkill.service loaded inactive dead Load/Save RF Kill Switch Status
|
||||
systemd-soft-reboot.service loaded inactive dead Reboot System Userspace
|
||||
systemd-suspend-then-hibernate.service loaded inactive dead System Suspend then Hibernate
|
||||
systemd-suspend.service loaded inactive dead System Suspend
|
||||
systemd-sysctl.service loaded active exited Apply Kernel Variables
|
||||
systemd-sysext.service loaded inactive dead Merge System Extension Images into /usr/ and /opt/
|
||||
systemd-sysusers.service loaded inactive dead Create System Users
|
||||
systemd-timesyncd.service loaded active running Network Time Synchronization
|
||||
systemd-tmpfiles-clean.service loaded inactive dead Cleanup of Temporary Directories
|
||||
systemd-tmpfiles-setup-dev-early.service loaded active exited Create Static Device Nodes in /dev gracefully
|
||||
systemd-tmpfiles-setup-dev.service loaded active exited Create Static Device Nodes in /dev
|
||||
systemd-tmpfiles-setup.service loaded active exited Create Volatile Files and Directories
|
||||
systemd-tpm2-setup-early.service loaded inactive dead TPM2 SRK Setup (Early)
|
||||
systemd-tpm2-setup.service loaded inactive dead TPM2 SRK Setup
|
||||
systemd-udev-settle.service loaded inactive dead Wait for udev To Complete Device Initialization
|
||||
systemd-udev-trigger.service loaded active exited Coldplug All udev Devices
|
||||
systemd-udevd.service loaded active running Rule-based Manager for Device Events and Files
|
||||
systemd-update-done.service loaded inactive dead Update is Completed
|
||||
systemd-update-utmp-runlevel.service loaded inactive dead Record Runlevel Change in UTMP
|
||||
systemd-update-utmp.service loaded active exited Record System Boot/Shutdown in UTMP
|
||||
systemd-user-sessions.service loaded active exited Permit User Sessions
|
||||
thermald.service loaded active running Thermal Daemon Service
|
||||
tpm-udev.service loaded inactive dead Handle dynamically added tpm devices
|
||||
ua-reboot-cmds.service loaded inactive dead Ubuntu Pro reboot cmds
|
||||
ua-timer.service loaded inactive dead Ubuntu Pro Timer for running repeated jobs
|
||||
ubuntu-advantage.service loaded inactive dead Ubuntu Pro Background Auto Attach
|
||||
udisks2.service loaded active running Disk Manager
|
||||
ufw.service loaded active exited Uncomplicated firewall
|
||||
unattended-upgrades.service loaded active running Unattended Upgrades Shutdown
|
||||
update-notifier-download.service loaded inactive dead Download data for packages that failed at package install time
|
||||
update-notifier-motd.service loaded inactive dead Check to see whether there is a new version of Ubuntu available
|
||||
upower.service loaded active running Daemon for power management
|
||||
user-runtime-dir@1000.service loaded active exited User Runtime Directory /run/user/1000
|
||||
user@1000.service loaded active running User Manager for UID 1000
|
||||
uuidd.service loaded inactive dead Daemon for generating UUIDs
|
||||
vgauth.service loaded inactive dead Authentication service for virtual machines hosted on VMware
|
||||
wpa_supplicant.service loaded active running WPA supplicant
|
||||
|
||||
Legend: LOAD → Reflects whether the unit definition was properly loaded.
|
||||
ACTIVE → The high-level unit activation state, i.e. generalization of SUB.
|
||||
SUB → The low-level unit activation state, values depend on unit type.
|
||||
|
||||
146 loaded units listed.
|
||||
@ -0,0 +1,37 @@
|
||||
http:
|
||||
middlewares:
|
||||
# Security headers
|
||||
security-headers:
|
||||
headers:
|
||||
stsSeconds: 63072000
|
||||
stsIncludeSubdomains: true
|
||||
stsPreload: true
|
||||
frameDeny: true
|
||||
contentTypeNosniff: true
|
||||
browserXssFilter: true
|
||||
referrerPolicy: "same-origin"
|
||||
|
||||
# Rate limiting
|
||||
ratelimit-basic:
|
||||
rateLimit:
|
||||
average: 50
|
||||
burst: 100
|
||||
|
||||
# Basic auth for dashboard
|
||||
dashboard-auth:
|
||||
basicAuth:
|
||||
users:
|
||||
- "chester:$apr1$hrRDQ/tR$ZwyxHOCDZjm/55GAs5/Ew1"
|
||||
|
||||
# HTTPS redirect
|
||||
https-redirect:
|
||||
redirectScheme:
|
||||
scheme: https
|
||||
permanent: true
|
||||
|
||||
# Dashboard slash redirect
|
||||
dashboard-slash:
|
||||
redirectregex:
|
||||
regex: ^/dashboard$
|
||||
replacement: /dashboard/
|
||||
permanent: true
|
||||
@ -0,0 +1,74 @@
|
||||
http:
|
||||
# Transport for self-signed certs
|
||||
serversTransports:
|
||||
insecure-transport:
|
||||
insecureSkipVerify: true
|
||||
|
||||
# Static routers for on-prem backends
|
||||
routers:
|
||||
tnas-router:
|
||||
rule: "Host(`tnas.castaldifamily.com`)"
|
||||
entryPoints:
|
||||
- websecure
|
||||
tls:
|
||||
certResolver: cloudflare
|
||||
service: tnas-service
|
||||
middlewares:
|
||||
- security-headers@file
|
||||
|
||||
dsm-router:
|
||||
rule: "Host(`dsm.castaldifamily.com`)"
|
||||
entryPoints:
|
||||
- websecure
|
||||
tls:
|
||||
certResolver: cloudflare
|
||||
service: dsm-service
|
||||
middlewares:
|
||||
- security-headers@file
|
||||
|
||||
watchtower-router:
|
||||
rule: "Host(`watchtower.castaldifamily.com`)"
|
||||
entryPoints:
|
||||
- websecure
|
||||
tls:
|
||||
certResolver: cloudflare
|
||||
service: watchtower-service
|
||||
middlewares:
|
||||
- security-headers@file
|
||||
|
||||
gatus-router:
|
||||
rule: "Host(`status.castaldifamily.com`)"
|
||||
entryPoints:
|
||||
- websecure
|
||||
tls:
|
||||
certResolver: cloudflare
|
||||
service: gatus-service
|
||||
middlewares:
|
||||
- security-headers@file
|
||||
|
||||
# Services (backends)
|
||||
services:
|
||||
tnas-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "https://10.0.0.250:5443/tos/#/"
|
||||
serversTransport: insecure-transport
|
||||
|
||||
dsm-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "https://10.0.0.249:5001"
|
||||
passHostHeader: true
|
||||
serversTransport: insecure-transport
|
||||
|
||||
watchtower-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "https://10.0.0.200:9090"
|
||||
serversTransport: insecure-transport
|
||||
|
||||
gatus-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://10.0.0.200:8080"
|
||||
serversTransport: insecure-transport
|
||||
@ -0,0 +1,57 @@
|
||||
global:
|
||||
checkNewVersion: false
|
||||
sendAnonymousUsage: false
|
||||
|
||||
log:
|
||||
level: DEBUG
|
||||
format: json
|
||||
|
||||
accessLog:
|
||||
format: json
|
||||
filePath: /var/log/traefik/access.log
|
||||
bufferingSize: 100
|
||||
|
||||
api:
|
||||
dashboard: true
|
||||
insecure: false
|
||||
|
||||
entryPoints:
|
||||
web:
|
||||
address: ":80"
|
||||
http:
|
||||
redirections:
|
||||
entryPoint:
|
||||
to: websecure
|
||||
scheme: https
|
||||
websecure:
|
||||
address: ":443"
|
||||
ping:
|
||||
address: ":8082"
|
||||
|
||||
ping:
|
||||
entryPoint: ping
|
||||
|
||||
providers:
|
||||
docker:
|
||||
endpoint: "tcp://docker-socket-proxy:2375"
|
||||
exposedByDefault: false
|
||||
network: proxy-net
|
||||
redis:
|
||||
endpoints:
|
||||
- redis:6379
|
||||
file:
|
||||
directory: /dynamic
|
||||
watch: true
|
||||
|
||||
certificatesResolvers:
|
||||
cloudflare:
|
||||
acme:
|
||||
email: nathan@castaldifamily.com
|
||||
storage: /certs/acme.json
|
||||
dnsChallenge:
|
||||
provider: cloudflare
|
||||
propagation:
|
||||
delayBeforeChecks: 0
|
||||
resolvers:
|
||||
- 1.1.1.1:53
|
||||
- 8.8.8.8:53
|
||||
@ -0,0 +1,17 @@
|
||||
EXECUTION MODE ENABLED
|
||||
|
||||
Phase 2 execution switch:
|
||||
- replacement_phase2_rebuild_and_rejoin=true
|
||||
|
||||
Phase 3 execution switch:
|
||||
- replacement_phase3_identity_cutover=false
|
||||
|
||||
Phase 4 execution switch:
|
||||
- replacement_phase4_validate_cutover=false
|
||||
|
||||
Manual steps still required around identity cutover:
|
||||
1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on pve04.
|
||||
2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots.
|
||||
3. If phase 4 enabled, validate swarm quorum and optional service endpoints.
|
||||
4. Move network identity 10.0.0.201 to replacement physical host.
|
||||
5. If stable and approved, power off old host.
|
||||
@ -0,0 +1,17 @@
|
||||
EXECUTION MODE ENABLED
|
||||
|
||||
Phase 2 execution switch:
|
||||
- replacement_phase2_rebuild_and_rejoin=true
|
||||
|
||||
Phase 3 execution switch:
|
||||
- replacement_phase3_identity_cutover=false
|
||||
|
||||
Phase 4 execution switch:
|
||||
- replacement_phase4_validate_cutover=false
|
||||
|
||||
Manual steps still required around identity cutover:
|
||||
1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on pve04.
|
||||
2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots.
|
||||
3. If phase 4 enabled, validate swarm quorum and optional service endpoints.
|
||||
4. Move network identity 10.0.0.201 to replacement physical host.
|
||||
5. If stable and approved, power off old host.
|
||||
@ -0,0 +1,17 @@
|
||||
EXECUTION MODE ENABLED
|
||||
|
||||
Phase 2 execution switch:
|
||||
- replacement_phase2_rebuild_and_rejoin=true
|
||||
|
||||
Phase 3 execution switch:
|
||||
- replacement_phase3_identity_cutover=false
|
||||
|
||||
Phase 4 execution switch:
|
||||
- replacement_phase4_validate_cutover=false
|
||||
|
||||
Manual steps still required around identity cutover:
|
||||
1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on pve04.
|
||||
2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots.
|
||||
3. If phase 4 enabled, validate swarm quorum and optional service endpoints.
|
||||
4. Move network identity 10.0.0.201 to replacement physical host.
|
||||
5. If stable and approved, power off old host.
|
||||
@ -0,0 +1,17 @@
|
||||
EXECUTION MODE ENABLED
|
||||
|
||||
Phase 2 execution switch:
|
||||
- replacement_phase2_rebuild_and_rejoin=false
|
||||
|
||||
Phase 3 execution switch:
|
||||
- replacement_phase3_identity_cutover=false
|
||||
|
||||
Phase 4 execution switch:
|
||||
- replacement_phase4_validate_cutover=true
|
||||
|
||||
Manual steps still required around identity cutover:
|
||||
1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on pve04.
|
||||
2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots.
|
||||
3. If phase 4 enabled, validate swarm quorum and optional service endpoints.
|
||||
4. Move network identity 10.0.0.201 to replacement physical host.
|
||||
5. If stable and approved, power off old host.
|
||||
@ -0,0 +1,17 @@
|
||||
Project: node-replacement-2026
|
||||
Validation manager: swarm-manager-2
|
||||
Logical pve01 host: pve01
|
||||
Swarm manager identity: swarm-manager-1
|
||||
Swarm worker identity: swarm-worker-1
|
||||
|
||||
=== docker node ls ===
|
||||
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
|
||||
hxcagfwxmrkqoyjo2mgfjeubm swarm-manager-1 Ready Active Reachable 29.3.0
|
||||
lalct6bxzf2nn5cpe68wxmjjh * swarm-manager-2 Ready Active Leader 29.3.0
|
||||
3aqljmk6dj41q6g6e2uac83nc swarm-manager-3 Ready Active Reachable 29.3.0
|
||||
3l735ukunrkbekq72fi0xzg97 swarm-worker-1 Ready Active 29.3.0
|
||||
j3j7o853tn00b38bxo3flbi0l swarm-worker-2 Ready Active 29.3.0
|
||||
54hq74d2ey5yjhtqh9hl5ieo9 swarm-worker-3 Ready Active 29.3.0
|
||||
|
||||
=== endpoint checks ===
|
||||
No endpoint checks configured.
|
||||
@ -0,0 +1,17 @@
|
||||
EXECUTION MODE ENABLED
|
||||
|
||||
Phase 2 execution switch:
|
||||
- replacement_phase2_rebuild_and_rejoin=false
|
||||
|
||||
Phase 3 execution switch:
|
||||
- replacement_phase3_identity_cutover=false
|
||||
|
||||
Phase 4 execution switch:
|
||||
- replacement_phase4_validate_cutover=true
|
||||
|
||||
Manual steps still required around identity cutover:
|
||||
1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on pve04.
|
||||
2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots.
|
||||
3. If phase 4 enabled, validate swarm quorum and optional service endpoints.
|
||||
4. Move network identity 10.0.0.201 to replacement physical host.
|
||||
5. If stable and approved, power off old host.
|
||||
@ -0,0 +1,17 @@
|
||||
Project: node-replacement-2026
|
||||
Validation manager: swarm-manager-3
|
||||
Logical pve01 host: pve01
|
||||
Swarm manager identity: swarm-manager-1
|
||||
Swarm worker identity: swarm-worker-1
|
||||
|
||||
=== docker node ls ===
|
||||
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
|
||||
hxcagfwxmrkqoyjo2mgfjeubm swarm-manager-1 Ready Active Reachable 29.3.0
|
||||
lalct6bxzf2nn5cpe68wxmjjh swarm-manager-2 Ready Active Leader 29.3.0
|
||||
3aqljmk6dj41q6g6e2uac83nc * swarm-manager-3 Ready Active Reachable 29.3.0
|
||||
3l735ukunrkbekq72fi0xzg97 swarm-worker-1 Ready Active 29.3.0
|
||||
j3j7o853tn00b38bxo3flbi0l swarm-worker-2 Ready Active 29.3.0
|
||||
54hq74d2ey5yjhtqh9hl5ieo9 swarm-worker-3 Ready Active 29.3.0
|
||||
|
||||
=== endpoint checks ===
|
||||
No endpoint checks configured.
|
||||
@ -0,0 +1,17 @@
|
||||
EXECUTION MODE ENABLED
|
||||
|
||||
Phase 2 execution switch:
|
||||
- replacement_phase2_rebuild_and_rejoin=false
|
||||
|
||||
Phase 3 execution switch:
|
||||
- replacement_phase3_identity_cutover=true
|
||||
|
||||
Phase 4 execution switch:
|
||||
- replacement_phase4_validate_cutover=false
|
||||
|
||||
Manual steps still required around identity cutover:
|
||||
1. If phase 2 enabled, rebuild and rejoin replacement swarm nodes on pve04.
|
||||
2. If phase 3 enabled, update inventory/group_vars source-of-truth with rollback snapshots.
|
||||
3. If phase 4 enabled, validate swarm quorum and optional service endpoints.
|
||||
4. Move network identity 10.0.0.201 to replacement physical host.
|
||||
5. If stable and approved, power off old host.
|
||||
@ -0,0 +1,11 @@
|
||||
Project: node-replacement-apply-20260313
|
||||
Phase: identity cutover source-of-truth update
|
||||
Inventory file: /home/chester/homelab/ansible/playbooks/proxmox/../../inventory/hosts.ini
|
||||
Group vars file: /home/chester/homelab/ansible/playbooks/proxmox/../../group_vars/all.yml
|
||||
Rollback inventory backup: /home/chester/homelab/ansible/playbooks/proxmox/../../outputs/node-replacement/node-replacement-apply-20260313-20260313T131217/rollback/hosts.ini.pre-cutover
|
||||
Rollback group vars backup: /home/chester/homelab/ansible/playbooks/proxmox/../../outputs/node-replacement/node-replacement-apply-20260313-20260313T131217/rollback/all.yml.pre-cutover
|
||||
|
||||
Applied updates:
|
||||
- Removed pve04 from proxmox_cluster inventory: True
|
||||
- Set physical_backing_host for pve01 to pve04
|
||||
- Set replacement_status in pve04 metadata
|
||||
@ -0,0 +1,183 @@
|
||||
# Central YAML Source of Truth for Nathan's Lab (2026)
|
||||
# Edit and commit this file; Ansible playbooks should read this as canonical.
|
||||
lab_name: "nathan-lab-2026"
|
||||
canonical_source: "ansible/group_vars/all.yml"
|
||||
|
||||
networks:
|
||||
main:
|
||||
vlan: 1
|
||||
cidr: "10.0.0.0/24"
|
||||
dhcp_pool: "10.0.0.100-10.0.0.240"
|
||||
gateway: "10.0.0.1"
|
||||
purpose: "Family / wired / main SSID"
|
||||
|
||||
infra:
|
||||
vlan: 10
|
||||
cidr: "10.0.10.0/24"
|
||||
reserved: "10.0.10.2-10.0.10.50"
|
||||
purpose: "Management / Proxmox / NAS / Heimdall mgmt"
|
||||
|
||||
iot:
|
||||
vlan: 50
|
||||
cidr: "10.0.50.0/24"
|
||||
dhcp_pool: "10.0.50.100-10.0.50.199"
|
||||
purpose: "IoT devices (Omada)"
|
||||
|
||||
guest:
|
||||
vlan: 30
|
||||
cidr: "10.0.30.0/24"
|
||||
dhcp_pool: "10.0.30.100-10.0.30.200"
|
||||
purpose: "Guest WiFi (isolated)"
|
||||
|
||||
compute:
|
||||
vlan: 200
|
||||
cidr: "10.0.200.0/24"
|
||||
purpose: "Swarm / AI grid / ephemeral compute"
|
||||
|
||||
lab_hosts:
|
||||
er7212pc:
|
||||
role: gateway
|
||||
current_ip: "10.0.0.2"
|
||||
desired_ip: "10.0.0.2"
|
||||
note: "DHCP + Omada controller"
|
||||
|
||||
pve01:
|
||||
role: proxmox
|
||||
current_ip: "10.0.0.201"
|
||||
desired_ip: "10.0.10.11"
|
||||
|
||||
pve02:
|
||||
role: proxmox
|
||||
current_ip: "10.0.0.202"
|
||||
desired_ip: "10.0.10.12"
|
||||
|
||||
pve03:
|
||||
role: proxmox
|
||||
current_ip: "10.0.0.203"
|
||||
desired_ip: "10.0.10.13"
|
||||
|
||||
pve04:
|
||||
role: proxmox
|
||||
current_ip: "10.0.0.204"
|
||||
desired_ip: "10.0.10.14"
|
||||
|
||||
swarm-manager-1:
|
||||
current_ip: "10.0.0.211"
|
||||
desired_ip: "10.0.200.11"
|
||||
|
||||
swarm-manager-2:
|
||||
current_ip: "10.0.0.212"
|
||||
desired_ip: "10.0.200.12"
|
||||
|
||||
swarm-manager-3:
|
||||
current_ip: "10.0.0.213"
|
||||
desired_ip: "10.0.200.13"
|
||||
|
||||
swarm-worker-1:
|
||||
current_ip: "10.0.0.221"
|
||||
desired_ip: "10.0.200.21"
|
||||
|
||||
swarm-worker-2:
|
||||
current_ip: "10.0.0.222"
|
||||
desired_ip: "10.0.200.22"
|
||||
|
||||
swarm-worker-3:
|
||||
current_ip: "10.0.0.223"
|
||||
desired_ip: "10.0.200.23"
|
||||
|
||||
ai-lenovo:
|
||||
current_ip: "10.0.0.220"
|
||||
desired_ip: "10.0.200.20"
|
||||
|
||||
synology:
|
||||
current_ip: "10.0.0.249"
|
||||
desired_ip: "10.0.10.40"
|
||||
|
||||
terramaster:
|
||||
current_ip: "10.0.0.250"
|
||||
desired_ip: "10.0.10.41"
|
||||
|
||||
waldorf:
|
||||
current_ip: "10.0.0.251"
|
||||
desired_ip: "10.0.200.30"
|
||||
|
||||
watchtower:
|
||||
current_ip: "10.0.0.200"
|
||||
desired_ip: "10.0.10.200"
|
||||
|
||||
heimdall:
|
||||
role: beelink
|
||||
current_ip: null
|
||||
desired_ip:
|
||||
mgmt: "10.0.10.2"
|
||||
lan: "10.0.0.50"
|
||||
|
||||
# === MONITORING INFRASTRUCTURE ===
|
||||
# Environment-specific configuration for monitoring stack
|
||||
monitoring:
|
||||
stack_user: "chester"
|
||||
heimdall_redis: "10.0.0.151:6379"
|
||||
watchtower_ip: "10.0.0.200"
|
||||
grafana_domain: "grafana.castaldifamily.com"
|
||||
uptime_domain: "status.castaldifamily.com"
|
||||
dozzle_domain: "logs.castaldifamily.com"
|
||||
authentik_host: "https://sso.castaldifamily.com"
|
||||
# grafana_admin_password: DEFINE IN VAULT
|
||||
|
||||
# === EDGE ROUTING TOPOLOGY ===
|
||||
# Canonical ingress model: Traefik runs on a dedicated edge host outside Swarm.
|
||||
# Swarm and standalone hosts publish routes through traefik-kop agents.
|
||||
edge_routing:
|
||||
ingress_mode: "external-traefik"
|
||||
edge_host:
|
||||
name: "heimdall"
|
||||
ip: "10.0.0.151"
|
||||
ssh_port: 22
|
||||
http_port: 80
|
||||
https_port: 443
|
||||
integration:
|
||||
# Watchtower-hosted traefik-kop instance (publishes Watchtower container routes)
|
||||
agent_image: "ghcr.io/jittering/traefik-kop:latest"
|
||||
redis_addr: "10.0.0.151:6379"
|
||||
bind_ip: "10.0.0.200" # Watchtower IP — correct for routes originating on Watchtower
|
||||
swarm:
|
||||
# Swarm-hosted traefik-kop instance (publishes Swarm service routes)
|
||||
# bind_ip MUST be a Swarm node IP — the Swarm routing mesh makes published
|
||||
# ports available on ALL nodes, so Traefik routes inbound requests here.
|
||||
bind_ip: "10.0.0.211" # swarm-manager-1; any Swarm node IP is valid via routing mesh
|
||||
proxy_network: "proxy-net" # Swarm overlay network; separate from heimdall's bridge of same name
|
||||
stack_deploy_target: "swarm-manager-1"
|
||||
migration_rules:
|
||||
deploy_traefik_in_swarm: false
|
||||
use_external_proxy_network: true
|
||||
notes:
|
||||
- "Services should attach to swarm overlay proxy-net for east-west traffic."
|
||||
- "Ingress is terminated by external Traefik at 10.0.0.151 via traefik-kop updates."
|
||||
|
||||
# === SERVICE SECRETS (set via: ansible-vault encrypt_string) ===
|
||||
vault_gitea_db_password: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
62323135663563386162633134616430633034366465376439663133346634616639376431356165
|
||||
6361376530363938656235623330396530643631616266330a323962373736383339353064633634
|
||||
36636664383530386539366137666632393134366435356634383061643566366335376164656531
|
||||
6464333566326261610a306366346638366439333535393161643066643234653165636636623832
|
||||
3135
|
||||
|
||||
vlan_defaults:
|
||||
dns_domain: "home.lab"
|
||||
ntp_servers:
|
||||
- "10.0.10.2"
|
||||
|
||||
# Plex bootstrap claim token — used only on first server claim.
|
||||
vault_plex_claim: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
65626432323737386462666132336161303635633438326432666631383339663835356238343838
|
||||
3533306232623437376263353161633530646533343739300a323730643330386633626661353234
|
||||
31643631346666666431666534613539333835623562306335376534626463633936643838323666
|
||||
6432626262323231660a323965393163366230363838623165643532356438393863346361656162
|
||||
63323966386333323236353861623333623339626538396565643965323562383636
|
||||
|
||||
# Usage notes:
|
||||
# - Treat this file as the single source of truth for IPs and VLANs.
|
||||
# - Ansible playbooks should read `networks` and `lab_hosts` to render configs,
|
||||
# update `inventory/hosts.ini`, and generate DHCP reservation templates.
|
||||
@ -0,0 +1,63 @@
|
||||
# Generated inventory from ../group_vars/all.yml
|
||||
|
||||
# --- Watchtower (local controller) ---
|
||||
[watchtower]
|
||||
localhost ansible_connection=local
|
||||
|
||||
# --- Proxmox Cluster (management) ---
|
||||
[proxmox_cluster]
|
||||
pve01 ansible_host=10.0.0.201 ansible_user=root ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519 ansible_port=22
|
||||
pve02 ansible_host=10.0.0.202 ansible_user=root ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519 ansible_port=22
|
||||
pve03 ansible_host=10.0.0.203 ansible_user=root ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519 ansible_port=22
|
||||
pve04 ansible_host=10.0.0.204 ansible_user=root ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519 ansible_port=22
|
||||
|
||||
[proxmox_cluster:vars]
|
||||
ansible_user=root
|
||||
ansible_become=true
|
||||
ansible_python_interpreter=/usr/bin/python3
|
||||
|
||||
# --- Swarm Managers ---
|
||||
[swarm_managers]
|
||||
swarm-manager-1 ansible_host=10.0.0.211
|
||||
swarm-manager-2 ansible_host=10.0.0.212
|
||||
swarm-manager-3 ansible_host=10.0.0.213
|
||||
|
||||
# --- Swarm Workers ---
|
||||
[swarm_workers]
|
||||
swarm-worker-1 ansible_host=10.0.0.221
|
||||
swarm-worker-2 ansible_host=10.0.0.222
|
||||
swarm-worker-3 ansible_host=10.0.0.223
|
||||
|
||||
[swarm_hosts:children]
|
||||
swarm_managers
|
||||
swarm_workers
|
||||
|
||||
[swarm_hosts:vars]
|
||||
ansible_user=chester
|
||||
ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519
|
||||
|
||||
# --- AI Grid ---
|
||||
[ai_grid]
|
||||
ai-lenovo ansible_host=10.0.0.220
|
||||
|
||||
# --- Docker Hosts ---
|
||||
[docker_hosts]
|
||||
heimdall ansible_host=10.0.0.151
|
||||
waldorf ansible_host=10.0.0.251
|
||||
|
||||
# --- Storage ---
|
||||
[storage]
|
||||
synology ansible_host=10.0.0.249 ansible_scp_if_ssh=True
|
||||
terramaster ansible_host=10.0.0.250 ansible_scp_if_ssh=True
|
||||
|
||||
# --- Aggregate grouping ---
|
||||
[ubuntu_lab:children]
|
||||
swarm_managers
|
||||
swarm_workers
|
||||
ai_grid
|
||||
docker_hosts
|
||||
storage
|
||||
|
||||
[ubuntu_lab:vars]
|
||||
ansible_user=chester
|
||||
ansible_ssh_private_key_file=/home/chester/.ssh/id_ed25519
|
||||
@ -0,0 +1,9 @@
|
||||
vault_authentik_postgres_password: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
32396365316438323862616536633232356436656366333561383864393932386531323935313463
|
||||
6235313233303938653530313039363530376439343634370a386263326335356330633332633039
|
||||
37373965303236383463396162356534336661396437383365336630363533383462383165366666
|
||||
3532353937336635330a656633356164383639313433326366316334333538613463336239383663
|
||||
37383263353930333039336534373166616633653239393932613937343164383935363139373935
|
||||
63643430303339396262613135373635636363663662663730326130633666303131383532613262
|
||||
663962393933663230333761623239343365
|
||||
10
ansible/ansible-old/outputs_vault_authentik_secret_key.txt
Normal file
10
ansible/ansible-old/outputs_vault_authentik_secret_key.txt
Normal file
@ -0,0 +1,10 @@
|
||||
vault_authentik_secret_key: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
63656438336336383936333735303639336131613835313833646331376331346635363062313833
|
||||
3561373665646664393137303533333630336663313366640a343538316162336263393862366235
|
||||
65326239613662376434313539653064666636313037343936356338643663313264366430356639
|
||||
3930316136383166380a636666633737663735306238313534626637656439383664356332396231
|
||||
37326366633861386636326565363338613766643134643830313763646139383763393638633431
|
||||
38623335333566356235366238313436353333663736316234333761646665663865393339656262
|
||||
33383430633139353163663666373532646466663131666539613061326666363033363832323033
|
||||
37623034333065336430
|
||||
@ -0,0 +1,174 @@
|
||||
---
|
||||
# Deploy a custom Ansible MCP server on Watchtower.
|
||||
#
|
||||
# Usage:
|
||||
# cd /home/chester/homelab/ansible
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/ai/deploy_ansible_mcp_watchtower.yml
|
||||
#
|
||||
# Validate only:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/ai/deploy_ansible_mcp_watchtower.yml --check
|
||||
|
||||
- name: Deploy Ansible MCP server on Watchtower
|
||||
hosts: watchtower
|
||||
become: true
|
||||
gather_facts: true
|
||||
|
||||
vars:
|
||||
mcp_service_name: ansible-mcp
|
||||
mcp_install_dir: /opt/ansible-mcp
|
||||
mcp_state_dir: /var/lib/ansible-mcp
|
||||
mcp_user: chester
|
||||
mcp_group: chester
|
||||
mcp_transport: streamable-http
|
||||
mcp_host: 0.0.0.0
|
||||
mcp_port: 8449
|
||||
|
||||
mcp_repo_root: /home/chester/homelab/ansible
|
||||
mcp_inventory: inventory/hosts.ini
|
||||
mcp_allowed_playbook_dirs: playbooks
|
||||
mcp_allowed_playbooks: ""
|
||||
mcp_api_token: "{{ lookup('env', 'ANSIBLE_MCP_API_TOKEN') | default('', true) }}"
|
||||
mcp_max_extra_vars_bytes: 16384
|
||||
mcp_blocked_extra_vars_keys: "ansible_password,ansible_become_password,vault_password"
|
||||
|
||||
# Full-write mode is enabled by default here to match requested behavior.
|
||||
# Keep confirm enforcement enabled in server guardrails.
|
||||
mcp_allow_write: true
|
||||
mcp_require_confirm_for_write: true
|
||||
|
||||
mcp_default_timeout: 900
|
||||
mcp_max_timeout: 3600
|
||||
|
||||
mcp_python_packages:
|
||||
- ansible-core>=2.16,<2.19
|
||||
- mcp>=1.0.0
|
||||
|
||||
tasks:
|
||||
- name: Assert API token is configured for HTTP transport
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- mcp_transport == "stdio" or (mcp_api_token | length) > 0
|
||||
fail_msg: >-
|
||||
HTTP transport requires ANSIBLE_MCP_API_TOKEN to be set in the control
|
||||
shell environment before running this playbook.
|
||||
success_msg: "Transport/auth configuration validated."
|
||||
|
||||
- name: Assert service account exists
|
||||
ansible.builtin.getent:
|
||||
database: passwd
|
||||
key: "{{ mcp_user }}"
|
||||
|
||||
- name: Ensure installation and state directories exist
|
||||
ansible.builtin.file:
|
||||
path: "{{ item.path }}"
|
||||
state: directory
|
||||
owner: "{{ item.owner }}"
|
||||
group: "{{ item.group }}"
|
||||
mode: "{{ item.mode }}"
|
||||
loop:
|
||||
- { path: "{{ mcp_install_dir }}", owner: "{{ mcp_user }}", group: "{{ mcp_group }}", mode: "0755" }
|
||||
- { path: "{{ mcp_state_dir }}", owner: "{{ mcp_user }}", group: "{{ mcp_group }}", mode: "0750" }
|
||||
|
||||
- name: Copy MCP server script
|
||||
ansible.builtin.copy:
|
||||
src: ../../scripts/ansible_mcp_server.py
|
||||
dest: "{{ mcp_install_dir }}/ansible_mcp_server.py"
|
||||
owner: "{{ mcp_user }}"
|
||||
group: "{{ mcp_group }}"
|
||||
mode: "0755"
|
||||
notify: Restart ansible mcp service
|
||||
|
||||
- name: Ensure Python venv exists
|
||||
ansible.builtin.command: "python3 -m venv {{ mcp_install_dir }}/.venv"
|
||||
args:
|
||||
creates: "{{ mcp_install_dir }}/.venv/bin/python"
|
||||
changed_when: false
|
||||
|
||||
- name: Install MCP server dependencies in venv
|
||||
ansible.builtin.pip:
|
||||
name: "{{ mcp_python_packages }}"
|
||||
virtualenv: "{{ mcp_install_dir }}/.venv"
|
||||
state: present
|
||||
notify: Restart ansible mcp service
|
||||
|
||||
- name: Install systemd unit for ansible mcp service
|
||||
ansible.builtin.copy:
|
||||
dest: "/etc/systemd/system/{{ mcp_service_name }}.service"
|
||||
owner: root
|
||||
group: root
|
||||
mode: "0644"
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Ansible MCP Server
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ mcp_user }}
|
||||
Group={{ mcp_group }}
|
||||
WorkingDirectory={{ mcp_repo_root }}
|
||||
Environment=ANSIBLE_MCP_REPO_ROOT={{ mcp_repo_root }}
|
||||
Environment=ANSIBLE_MCP_INVENTORY={{ mcp_inventory }}
|
||||
Environment=ANSIBLE_MCP_ALLOWED_PLAYBOOK_DIRS={{ mcp_allowed_playbook_dirs }}
|
||||
Environment=ANSIBLE_MCP_ALLOWED_PLAYBOOKS={{ mcp_allowed_playbooks }}
|
||||
Environment=ANSIBLE_MCP_API_TOKEN={{ mcp_api_token }}
|
||||
Environment=ANSIBLE_MCP_ALLOW_WRITE={{ mcp_allow_write | ternary('true', 'false') }}
|
||||
Environment=ANSIBLE_MCP_REQUIRE_CONFIRM={{ mcp_require_confirm_for_write | ternary('true', 'false') }}
|
||||
Environment=ANSIBLE_MCP_DEFAULT_TIMEOUT={{ mcp_default_timeout }}
|
||||
Environment=ANSIBLE_MCP_MAX_TIMEOUT={{ mcp_max_timeout }}
|
||||
Environment=ANSIBLE_MCP_MAX_EXTRA_VARS_BYTES={{ mcp_max_extra_vars_bytes }}
|
||||
Environment=ANSIBLE_MCP_BLOCKED_EXTRA_VARS_KEYS={{ mcp_blocked_extra_vars_keys }}
|
||||
Environment=ANSIBLE_MCP_STATE_DIR={{ mcp_state_dir }}
|
||||
Environment=ANSIBLE_MCP_TRANSPORT={{ mcp_transport }}
|
||||
Environment=ANSIBLE_MCP_HOST={{ mcp_host }}
|
||||
Environment=ANSIBLE_MCP_PORT={{ mcp_port }}
|
||||
ExecStart={{ mcp_install_dir }}/.venv/bin/python {{ mcp_install_dir }}/ansible_mcp_server.py --transport {{ mcp_transport }} --host {{ mcp_host }} --port {{ mcp_port }}
|
||||
Restart=on-failure
|
||||
RestartSec=3
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
notify:
|
||||
- Reload systemd
|
||||
- Restart ansible mcp service
|
||||
|
||||
- name: Ensure ansible mcp service is enabled and running
|
||||
ansible.builtin.systemd:
|
||||
name: "{{ mcp_service_name }}"
|
||||
enabled: true
|
||||
state: started
|
||||
|
||||
- name: Verify MCP health endpoint
|
||||
ansible.builtin.uri:
|
||||
url: "http://127.0.0.1:{{ mcp_port }}"
|
||||
method: GET
|
||||
return_content: true
|
||||
status_code: 200
|
||||
changed_when: false
|
||||
register: _mcp_http_probe
|
||||
failed_when: false
|
||||
|
||||
- name: Show deployment summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Ansible MCP deployed to watchtower"
|
||||
- "Service: {{ mcp_service_name }}"
|
||||
- "Transport: {{ mcp_transport }}"
|
||||
- "Endpoint: {{ mcp_host }}:{{ mcp_port }}"
|
||||
- "Repo root: {{ mcp_repo_root }}"
|
||||
- "Allow write: {{ mcp_allow_write }}"
|
||||
- "Auth enabled: {{ (mcp_api_token | length) > 0 }}"
|
||||
- "Require confirm for write: {{ mcp_require_confirm_for_write }}"
|
||||
- "Explicit playbook allowlist set: {{ (mcp_allowed_playbooks | length) > 0 }}"
|
||||
- "HTTP probe status: {{ _mcp_http_probe.status | default('n/a') }}"
|
||||
|
||||
handlers:
|
||||
- name: Reload systemd
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
|
||||
- name: Restart ansible mcp service
|
||||
ansible.builtin.systemd:
|
||||
name: "{{ mcp_service_name }}"
|
||||
state: restarted
|
||||
74
ansible/ansible-old/playbooks/ai/test_ollama.yml
Normal file
74
ansible/ansible-old/playbooks/ai/test_ollama.yml
Normal file
@ -0,0 +1,74 @@
|
||||
---
|
||||
- name: Test Karakeep to Ollama connection
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
karakeep_host: "10.0.0.251"
|
||||
ollama_host: "10.0.0.220"
|
||||
ollama_port: 11434
|
||||
container_name: "hoarder-web"
|
||||
|
||||
tasks:
|
||||
- name: Check Ollama API is reachable
|
||||
ansible.builtin.uri:
|
||||
url: "http://{{ ollama_host }}:{{ ollama_port }}/api/tags"
|
||||
method: GET
|
||||
return_content: true
|
||||
status_code: 200
|
||||
register: ollama_check
|
||||
changed_when: false
|
||||
|
||||
- name: Show available models
|
||||
ansible.builtin.debug:
|
||||
msg: "Ollama models: {{ ollama_check.json.models | map(attribute='name') | list }}"
|
||||
|
||||
- name: Test connectivity from Karakeep container
|
||||
community.docker.docker_container_exec:
|
||||
container: "{{ container_name }}"
|
||||
command: "/bin/sh -c 'wget -qO- http://{{ ollama_host }}:{{ ollama_port }}/api/tags'"
|
||||
delegate_to: "{{ karakeep_host }}"
|
||||
vars:
|
||||
ansible_user: chester
|
||||
ansible_ssh_private_key_file: /home/chester/.ssh/id_ed25519
|
||||
register: container_test
|
||||
changed_when: false
|
||||
|
||||
- name: Verify container can reach Ollama
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- "'models' in container_test.stdout"
|
||||
success_msg: "Container can reach Ollama"
|
||||
fail_msg: "Container cannot reach Ollama"
|
||||
|
||||
- name: Extract Ollama-related environment variables
|
||||
community.docker.docker_container_info:
|
||||
name: "{{ container_name }}"
|
||||
delegate_to: "{{ karakeep_host }}"
|
||||
vars:
|
||||
ansible_user: chester
|
||||
ansible_ssh_private_key_file: /home/chester/.ssh/id_ed25519
|
||||
register: container_info
|
||||
|
||||
- name: Show configuration
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ container_info.container.Config.Env | select('match', '^(OLLAMA|INFERENCE).*') | list }}"
|
||||
|
||||
- name: Verify configuration is correct
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- "'OLLAMA_BASE_URL=http://' + ollama_host + ':' + (ollama_port | string) in container_info.container.Config.Env"
|
||||
- "'INFERENCE_TEXT_MODEL=llama3.1:8b' in container_info.container.Config.Env"
|
||||
- "'INFERENCE_IMAGE_MODEL=llama3.2-vision:11b' in container_info.container.Config.Env"
|
||||
success_msg: "Configuration is correct"
|
||||
fail_msg: "Configuration needs updating"
|
||||
|
||||
- name: Display validation summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Validation complete"
|
||||
- "Ollama: {{ ollama_host }}:{{ ollama_port }}"
|
||||
- "Karakeep: {{ karakeep_host }}"
|
||||
- "Container: {{ container_name }}"
|
||||
- "Connection: Working"
|
||||
- "Config: Valid"
|
||||
152
ansible/ansible-old/playbooks/ai/validate_karakeep.yml
Normal file
152
ansible/ansible-old/playbooks/ai/validate_karakeep.yml
Normal file
@ -0,0 +1,152 @@
|
||||
---
|
||||
- name: Validate Ollama service and models
|
||||
hosts: ai_grid
|
||||
gather_facts: true
|
||||
tags: [ollama, models]
|
||||
|
||||
vars:
|
||||
ollama_base_url: "http://{{ ansible_host }}:11434"
|
||||
required_models:
|
||||
- name: "llama3.1:8b"
|
||||
type: "text"
|
||||
- name: "llama3.2-vision:11b"
|
||||
type: "vision"
|
||||
|
||||
tasks:
|
||||
- name: Check Ollama service is responding
|
||||
ansible.builtin.uri:
|
||||
url: "{{ ollama_base_url }}/api/tags"
|
||||
method: GET
|
||||
return_content: true
|
||||
status_code: 200
|
||||
register: ollama_response
|
||||
changed_when: false
|
||||
|
||||
- name: Parse available models
|
||||
ansible.builtin.set_fact:
|
||||
available_models: "{{ ollama_response.json.models | map(attribute='name') | list }}"
|
||||
|
||||
- name: Display available models
|
||||
ansible.builtin.debug:
|
||||
msg: "Available models: {{ available_models }}"
|
||||
|
||||
- name: Verify required models are installed
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- item.name in available_models
|
||||
fail_msg: "Required model {{ item.name }} ({{ item.type }}) is not installed"
|
||||
success_msg: "Model {{ item.name }} ({{ item.type }}) is available"
|
||||
loop: "{{ required_models }}"
|
||||
loop_control:
|
||||
label: "{{ item.name }}"
|
||||
|
||||
- name: Test text model inference
|
||||
ansible.builtin.uri:
|
||||
url: "{{ ollama_base_url }}/api/generate"
|
||||
method: POST
|
||||
body_format: json
|
||||
body:
|
||||
model: "llama3.1:8b"
|
||||
prompt: "Hello"
|
||||
stream: false
|
||||
return_content: true
|
||||
status_code: 200
|
||||
timeout: 30
|
||||
register: text_inference_test
|
||||
changed_when: false
|
||||
|
||||
- name: Verify text model response
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- text_inference_test.json.response is defined
|
||||
- text_inference_test.json.response | length > 0
|
||||
success_msg: "Text model inference successful"
|
||||
fail_msg: "Text model inference failed"
|
||||
|
||||
- name: Show Ollama validation summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Ollama validation passed"
|
||||
- "Host: {{ inventory_hostname }} ({{ ansible_host }})"
|
||||
- "Models available: {{ available_models | length }}"
|
||||
- "Text inference: Working"
|
||||
|
||||
- name: Validate legacy Karakeep integration
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
tags: [karakeep, integration, legacy]
|
||||
vars:
|
||||
test_legacy_karakeep: "{{ test_legacy_karakeep | default(false) }}"
|
||||
container_name: "hoarder-web"
|
||||
ollama_host: "10.0.0.220"
|
||||
ollama_port: 11434
|
||||
legacy_host: "{{ legacy_host | default('10.0.0.251') }}"
|
||||
|
||||
tasks:
|
||||
- name: Skip legacy validation when disabled
|
||||
ansible.builtin.meta: end_play
|
||||
when: not (test_legacy_karakeep | bool)
|
||||
|
||||
- name: Check whether Karakeep container is running
|
||||
community.docker.docker_container_info:
|
||||
name: "{{ container_name }}"
|
||||
delegate_to: "{{ legacy_host }}"
|
||||
vars:
|
||||
ansible_user: chester
|
||||
ansible_ssh_private_key_file: /home/chester/.ssh/id_ed25519
|
||||
register: karakeep_container
|
||||
|
||||
- name: Verify Karakeep container status
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- karakeep_container.exists
|
||||
- karakeep_container.container.State.Running
|
||||
- karakeep_container.container.State.Health.Status == "healthy"
|
||||
fail_msg: "Karakeep container is not running or unhealthy"
|
||||
success_msg: "Karakeep container is running and healthy"
|
||||
|
||||
- name: Extract Ollama environment values
|
||||
ansible.builtin.set_fact:
|
||||
ollama_config: "{{ karakeep_container.container.Config.Env | select('match', '^(OLLAMA|INFERENCE).*') | list }}"
|
||||
|
||||
- name: Verify Karakeep Ollama environment variables
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- "'OLLAMA_BASE_URL=http://' + ollama_host + ':' + (ollama_port | string) in ollama_config"
|
||||
- "'INFERENCE_TEXT_MODEL=llama3.1:8b' in ollama_config"
|
||||
- "'INFERENCE_IMAGE_MODEL=llama3.2-vision:11b' in ollama_config"
|
||||
fail_msg: "Ollama environment variables are incorrect"
|
||||
success_msg: "Ollama environment variables are correctly configured"
|
||||
|
||||
- name: Test Ollama connectivity from Karakeep container
|
||||
community.docker.docker_container_exec:
|
||||
container: "{{ container_name }}"
|
||||
command: "/bin/sh -c 'wget -qO- http://{{ ollama_host }}:{{ ollama_port }}/api/tags'"
|
||||
delegate_to: "{{ legacy_host }}"
|
||||
vars:
|
||||
ansible_user: chester
|
||||
ansible_ssh_private_key_file: /home/chester/.ssh/id_ed25519
|
||||
register: container_connectivity
|
||||
changed_when: false
|
||||
failed_when: container_connectivity.rc != 0
|
||||
|
||||
- name: Verify container can reach Ollama API
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- "'models' in container_connectivity.stdout"
|
||||
success_msg: "Karakeep container can reach Ollama API"
|
||||
fail_msg: "Karakeep container cannot reach Ollama API"
|
||||
|
||||
- name: Display integration test summary
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
tags: [summary]
|
||||
|
||||
tasks:
|
||||
- name: Show final validation report
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Service validation complete"
|
||||
- "Ollama endpoint: http://10.0.0.220:11434"
|
||||
- "Models: llama3.1:8b, llama3.2-vision:11b"
|
||||
- "Legacy Karakeep tested: {{ test_legacy_karakeep | default(false) }}"
|
||||
49
ansible/ansible-old/playbooks/docker/bootstrap_swarm.yml
Normal file
49
ansible/ansible-old/playbooks/docker/bootstrap_swarm.yml
Normal file
@ -0,0 +1,49 @@
|
||||
---
|
||||
# Bootstrap Docker and Swarm cluster state for all swarm nodes.
|
||||
|
||||
# --------------------------------------------------
|
||||
# PRE-PLAY: Ensure NFS storage mounts are present before Swarm starts.
|
||||
# WHY first: Docker bind-mount paths (/mnt/homelab, /mnt/media) must exist
|
||||
# as live NFS mounts before any stack deploy runs. If absent, Docker
|
||||
# creates an empty local directory instead — silent wrong-state behavior.
|
||||
# WHY storage_mounts role: idempotent via ansible.posix.mount; safe to re-run
|
||||
# on already-mounted hosts (no-op when mount table already matches fstab).
|
||||
# --------------------------------------------------
|
||||
- name: Ensure NFS storage mounts are present on all Swarm nodes
|
||||
hosts: swarm_hosts
|
||||
become: true
|
||||
gather_facts: true
|
||||
roles:
|
||||
- storage_mounts
|
||||
|
||||
# --------------------------------------------------
|
||||
# PRE-PLAY: Ensure the operational user is in the docker group on every node.
|
||||
# WHY separate play: the swarm_bootstrap role runs from `hosts: localhost` via
|
||||
# delegate_to, so `--limit swarm-node` silently skips that play. Running this
|
||||
# directly on swarm_hosts makes it independently targetable and idempotent.
|
||||
# WHY before the bootstrap play: docker daemon must accept socket connections
|
||||
# from ansible_user before any subsequent docker-cli tasks succeed.
|
||||
# --------------------------------------------------
|
||||
- name: Ensure docker group membership for the operational user on all swarm nodes
|
||||
hosts: swarm_hosts
|
||||
become: true
|
||||
gather_facts: false
|
||||
tags: [docker-users, docker-install]
|
||||
tasks:
|
||||
- name: Add ansible user to the docker group
|
||||
ansible.builtin.user:
|
||||
name: "{{ ansible_user }}"
|
||||
groups: docker
|
||||
append: true
|
||||
|
||||
- name: Bootstrap Docker Swarm cluster
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
|
||||
tasks:
|
||||
- name: Run swarm bootstrap role from the primary manager context
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_bootstrap
|
||||
tags: [swarm-join]
|
||||
186
ansible/ansible-old/playbooks/docker/deploy_authentik.yml
Normal file
186
ansible/ansible-old/playbooks/docker/deploy_authentik.yml
Normal file
@ -0,0 +1,186 @@
|
||||
---
|
||||
# playbooks/docker/deploy_authentik.yml
|
||||
#
|
||||
# Purpose:
|
||||
# Deploy Authentik as a Swarm stack pinned to swarm-manager-1 with persistent
|
||||
# bind mounts under /mnt/homelab/apps/authentik.
|
||||
#
|
||||
# Data protection:
|
||||
# This playbook validates all required Authentik data paths before deploy.
|
||||
# If paths are missing, deployment fails early to avoid creating empty data
|
||||
# roots that could mask or diverge from an existing Authentik installation.
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_authentik.yml
|
||||
#
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_authentik.yml \
|
||||
# -e "stack_validate_only=true"
|
||||
#
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_authentik.yml \
|
||||
# -e "authentik_deploy_state=absent"
|
||||
|
||||
- name: Deploy Authentik Swarm stack
|
||||
hosts: swarm_managers
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
vars:
|
||||
authentik_deploy_target: "{{ edge_routing.swarm.stack_deploy_target | default(groups['swarm_managers'][0]) }}"
|
||||
|
||||
tasks:
|
||||
# --------------------------------------------------
|
||||
# STEP 0: Assert required secrets are present
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Assert vault_authentik_secret_key is defined and non-empty
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_authentik_secret_key is defined
|
||||
- vault_authentik_secret_key | trim | length > 0
|
||||
fail_msg: >-
|
||||
vault_authentik_secret_key is not defined or is empty.
|
||||
Encrypt and store it in group_vars/vault/all.yml with:
|
||||
ansible-vault encrypt_string 'your-random-secret' --name 'vault_authentik_secret_key'
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
- name: Assert vault_authentik_postgres_password is defined and non-empty
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_authentik_postgres_password is defined
|
||||
- vault_authentik_postgres_password | trim | length > 0
|
||||
fail_msg: >-
|
||||
vault_authentik_postgres_password is not defined or is empty.
|
||||
Encrypt and store it in group_vars/vault/all.yml with:
|
||||
ansible-vault encrypt_string 'your-db-password' --name 'vault_authentik_postgres_password'
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
- name: Assert Authentik secrets are not placeholders
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_authentik_secret_key not in ['change-me', 'changeme', 'your-random-secret']
|
||||
- vault_authentik_postgres_password not in ['change-me', 'changeme', 'your-db-password']
|
||||
fail_msg: "Authentik secrets still appear to be placeholders. Set real vault values before deploy."
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 1: Assert Swarm manager is active
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Collect Swarm manager state
|
||||
ansible.builtin.command: >
|
||||
docker info --format '{{ "{{" }}.Swarm.LocalNodeState{{ "}}" }}|{{ "{{" }}.Swarm.ControlAvailable{{ "}}" }}'
|
||||
register: _swarm_info
|
||||
changed_when: false
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
- name: Assert target is an active Swarm manager
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- _swarm_info.stdout is search('active')
|
||||
- _swarm_info.stdout is search('true')
|
||||
fail_msg: >-
|
||||
{{ inventory_hostname }} must be an active Swarm manager.
|
||||
Current state: {{ _swarm_info.stdout | default('unknown') }}
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 2: Validate pre-existing persistent data paths
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Stat required Authentik bind-mount paths
|
||||
ansible.builtin.stat:
|
||||
path: "{{ item }}"
|
||||
register: _authentik_path_stat
|
||||
loop:
|
||||
- /mnt/homelab/apps/authentik
|
||||
- /mnt/homelab/apps/authentik/data
|
||||
- /mnt/homelab/apps/authentik/data/database
|
||||
- /mnt/homelab/apps/authentik/data/redis
|
||||
- /mnt/homelab/apps/authentik/data/media
|
||||
- /mnt/homelab/apps/authentik/data/config
|
||||
- /mnt/homelab/apps/authentik/data/blueprints
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
- name: Assert required Authentik paths exist before deploy
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- item.stat.exists
|
||||
- item.stat.isdir
|
||||
fail_msg: >-
|
||||
Required Authentik path '{{ item.item }}' is missing on {{ inventory_hostname }}.
|
||||
Create/restore this directory first to avoid accidental fresh bootstrap over existing data.
|
||||
loop: "{{ _authentik_path_stat.results }}"
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 3: Deploy Authentik stack
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Authentik stack
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_stack_deploy
|
||||
vars:
|
||||
stack_name: "authentik"
|
||||
stack_compose_src: "{{ playbook_dir }}/../../templates/stacks/authentik.stack.yml"
|
||||
# authentik_placement_node resolved from group_vars (swarm-manager-2)
|
||||
# Use service-specific state var to avoid self-reference recursion.
|
||||
stack_state: "{{ authentik_deploy_state | default('present') }}"
|
||||
stack_required_external_networks:
|
||||
- proxy-net
|
||||
stack_required_directories:
|
||||
- /mnt/homelab/apps/authentik
|
||||
- /mnt/homelab/apps/authentik/data
|
||||
- /mnt/homelab/apps/authentik/data/database
|
||||
- /mnt/homelab/apps/authentik/data/redis
|
||||
- /mnt/homelab/apps/authentik/data/media
|
||||
- /mnt/homelab/apps/authentik/data/config
|
||||
- /mnt/homelab/apps/authentik/data/blueprints
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 4: Wait for service convergence
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Wait for Authentik server service to converge
|
||||
ansible.builtin.command: >
|
||||
docker service ls --filter name=authentik_authentik-server --format '{{ "{{" }}.Replicas{{ "}}" }}'
|
||||
register: _authentik_server_replicas
|
||||
retries: 18
|
||||
delay: 10
|
||||
until: _authentik_server_replicas.stdout is search('1/1')
|
||||
changed_when: false
|
||||
when:
|
||||
- inventory_hostname == authentik_deploy_target
|
||||
- authentik_deploy_state | default('present') == 'present'
|
||||
- not ansible_check_mode
|
||||
tags: [verify]
|
||||
|
||||
- name: Wait for Authentik worker service to converge
|
||||
ansible.builtin.command: >
|
||||
docker service ls --filter name=authentik_authentik-worker --format '{{ "{{" }}.Replicas{{ "}}" }}'
|
||||
register: _authentik_worker_replicas
|
||||
retries: 18
|
||||
delay: 10
|
||||
until: _authentik_worker_replicas.stdout is search('1/1')
|
||||
changed_when: false
|
||||
when:
|
||||
- inventory_hostname == authentik_deploy_target
|
||||
- authentik_deploy_state | default('present') == 'present'
|
||||
- not ansible_check_mode
|
||||
tags: [verify]
|
||||
|
||||
- name: Report Authentik deployment result
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "================================================"
|
||||
- "Authentik deployment complete."
|
||||
- "================================================"
|
||||
- "Stack : authentik"
|
||||
- "Manager : {{ inventory_hostname }} ({{ ansible_host | default('') }})"
|
||||
- "URL : https://sso.castaldifamily.com"
|
||||
- "Data root : /mnt/homelab/apps/authentik"
|
||||
- "Services : authentik-postgres, authentik-redis, authentik-server, authentik-worker"
|
||||
- "================================================"
|
||||
when: inventory_hostname == authentik_deploy_target
|
||||
tags: [always]
|
||||
@ -0,0 +1,173 @@
|
||||
---
|
||||
# playbooks/docker/deploy_authentik_standalone.yml
|
||||
# Deploy Authentik on a standalone Docker host (statler by default).
|
||||
|
||||
- name: Deploy Authentik on standalone Docker host
|
||||
hosts: "{{ target_host | default('statler') }}"
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
|
||||
vars:
|
||||
authentik_base_dir: "{{ standalone_authentik_base_dir | default('/mnt/homelab/apps/authentik') }}"
|
||||
authentik_db_dir: "{{ authentik_base_dir }}/data/database"
|
||||
authentik_redis_dir: "{{ authentik_base_dir }}/data/redis"
|
||||
authentik_media_dir: "{{ authentik_base_dir }}/data/media"
|
||||
authentik_config_dir: "{{ authentik_base_dir }}/data/config"
|
||||
authentik_blueprints_dir: "{{ authentik_base_dir }}/data/blueprints"
|
||||
authentik_network: "proxy-net"
|
||||
authentik_host_domain: "{{ standalone_authentik_domain | default('sso.castaldifamily.com') }}"
|
||||
authentik_bind_ip: "{{ ansible_host }}"
|
||||
authentik_redis_addr: "{{ edge_routing.integration.redis_addr }}"
|
||||
|
||||
tasks:
|
||||
- name: Assert target_host is explicit and safe
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- target_host is defined
|
||||
- target_host | length > 0
|
||||
- target_host not in ['all', '*', 'ubuntu_lab', 'docker_hosts', 'swarm_hosts']
|
||||
fail_msg: >-
|
||||
Invalid target_host scope. Use an explicit host, for example:
|
||||
-e "target_host=statler"
|
||||
run_once: true
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Assert Authentik secrets are available and decrypted
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_authentik_secret_key is defined
|
||||
- vault_authentik_secret_key | trim | length > 0
|
||||
- vault_authentik_postgres_password is defined
|
||||
- vault_authentik_postgres_password | trim | length > 0
|
||||
- vault_authentik_secret_key is not search('^\\$ANSIBLE_VAULT;')
|
||||
- vault_authentik_postgres_password is not search('^\\$ANSIBLE_VAULT;')
|
||||
fail_msg: >-
|
||||
Authentik secrets are unavailable or not decrypted.
|
||||
Ensure vault credentials are available before deployment.
|
||||
|
||||
- name: Ensure Authentik app directories exist
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: "1000"
|
||||
group: "1000"
|
||||
mode: '0755'
|
||||
loop:
|
||||
- "{{ authentik_base_dir }}"
|
||||
- "{{ authentik_media_dir }}"
|
||||
- "{{ authentik_config_dir }}"
|
||||
- "{{ authentik_blueprints_dir }}"
|
||||
|
||||
- name: Ensure Authentik service data directories exist
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
loop:
|
||||
- "{{ authentik_db_dir }}"
|
||||
- "{{ authentik_redis_dir }}"
|
||||
|
||||
- name: Ensure Authentik network exists
|
||||
community.docker.docker_network:
|
||||
name: "{{ authentik_network }}"
|
||||
state: present
|
||||
|
||||
- name: Deploy Authentik Postgres
|
||||
community.docker.docker_container:
|
||||
name: authentik-postgres
|
||||
image: docker.io/library/postgres:16-alpine
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: started
|
||||
env:
|
||||
TZ: America/New_York
|
||||
POSTGRES_DB: authentik
|
||||
POSTGRES_USER: authentik
|
||||
POSTGRES_PASSWORD: "{{ vault_authentik_postgres_password }}"
|
||||
volumes:
|
||||
- "{{ authentik_db_dir }}:/var/lib/postgresql/data"
|
||||
networks:
|
||||
- name: "{{ authentik_network }}"
|
||||
|
||||
- name: Deploy Authentik Redis
|
||||
community.docker.docker_container:
|
||||
name: authentik-redis
|
||||
image: redis:7-alpine
|
||||
pull: always
|
||||
command:
|
||||
- --save
|
||||
- "60"
|
||||
- "1"
|
||||
- --loglevel
|
||||
- warning
|
||||
restart_policy: unless-stopped
|
||||
state: started
|
||||
volumes:
|
||||
- "{{ authentik_redis_dir }}:/data"
|
||||
networks:
|
||||
- name: "{{ authentik_network }}"
|
||||
|
||||
- name: Deploy Authentik server with Traefik labels
|
||||
community.docker.docker_container:
|
||||
name: authentik-server
|
||||
image: ghcr.io/goauthentik/server:2025.10.1
|
||||
pull: always
|
||||
command: ["server"]
|
||||
restart_policy: unless-stopped
|
||||
state: started
|
||||
published_ports:
|
||||
- "9000:9000"
|
||||
env:
|
||||
TZ: America/New_York
|
||||
AUTHENTIK_POSTGRESQL__HOST: authentik-postgres
|
||||
AUTHENTIK_POSTGRESQL__NAME: authentik
|
||||
AUTHENTIK_POSTGRESQL__USER: authentik
|
||||
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ vault_authentik_postgres_password }}"
|
||||
AUTHENTIK_SECRET_KEY: "{{ vault_authentik_secret_key }}"
|
||||
AUTHENTIK_REDIS__HOST: authentik-redis
|
||||
volumes:
|
||||
- "{{ authentik_media_dir }}:/media"
|
||||
- "{{ authentik_config_dir }}:/config"
|
||||
- "{{ authentik_blueprints_dir }}:/blueprints/custom:ro"
|
||||
networks:
|
||||
- name: "{{ authentik_network }}"
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.http.routers.authentik.rule: "Host(`{{ authentik_host_domain }}`)"
|
||||
traefik.http.routers.authentik.entrypoints: websecure
|
||||
traefik.http.routers.authentik.tls: "true"
|
||||
traefik.http.routers.authentik.tls.certresolver: cloudflare
|
||||
traefik.http.services.authentik.loadbalancer.server.port: "9000"
|
||||
|
||||
- name: Deploy Authentik worker
|
||||
community.docker.docker_container:
|
||||
name: authentik-worker
|
||||
image: ghcr.io/goauthentik/server:2025.10.1
|
||||
pull: always
|
||||
command: ["worker"]
|
||||
restart_policy: unless-stopped
|
||||
state: started
|
||||
env:
|
||||
TZ: America/New_York
|
||||
AUTHENTIK_POSTGRESQL__HOST: authentik-postgres
|
||||
AUTHENTIK_POSTGRESQL__NAME: authentik
|
||||
AUTHENTIK_POSTGRESQL__USER: authentik
|
||||
AUTHENTIK_POSTGRESQL__PASSWORD: "{{ vault_authentik_postgres_password }}"
|
||||
AUTHENTIK_SECRET_KEY: "{{ vault_authentik_secret_key }}"
|
||||
AUTHENTIK_REDIS__HOST: authentik-redis
|
||||
volumes:
|
||||
- "{{ authentik_media_dir }}:/media"
|
||||
- "{{ authentik_config_dir }}:/config"
|
||||
networks:
|
||||
- name: "{{ authentik_network }}"
|
||||
|
||||
- name: Show deployment summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Standalone Authentik deployed to {{ inventory_hostname }}"
|
||||
- "Base dir: {{ authentik_base_dir }}"
|
||||
- "Domain: {{ authentik_host_domain }}"
|
||||
- "Traefik-kop Redis: {{ authentik_redis_addr }}"
|
||||
- "Bind IP: {{ authentik_bind_ip }}"
|
||||
178
ansible/ansible-old/playbooks/docker/deploy_example_stack.yml
Normal file
178
ansible/ansible-old/playbooks/docker/deploy_example_stack.yml
Normal file
@ -0,0 +1,178 @@
|
||||
---
|
||||
# =============================================================================
|
||||
# FUTURE-STACK DEPLOYMENT BLUEPRINT — copy, rename, and fill in TODO items.
|
||||
# This playbook is the minimum viable deploy playbook for any new Swarm stack.
|
||||
#
|
||||
# COPY CHECKLIST:
|
||||
# 1. Rename this file to deploy_<service>.yml
|
||||
# 2. Search for TODO and fill in every occurrence
|
||||
# 3. Run validate-only first:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_<service>.yml \
|
||||
# -e "stack_validate_only=true"
|
||||
# 4. Run full deploy and verify convergence
|
||||
# 5. Run deploy a second time and confirm "changed=0" (idempotency proof)
|
||||
# =============================================================================
|
||||
#
|
||||
# IDEMPOTENCY CONTRACT (required for all new stacks):
|
||||
# - All required secrets MUST be asserted before any Swarm state is touched.
|
||||
# - All required bind-mount paths MUST be statted and asserted before deploy.
|
||||
# - All command/shell tasks MUST declare changed_when.
|
||||
# - validate-only mode MUST work without any Swarm mutations.
|
||||
# - Deploy MUST be replay-safe: running twice produces no unintended changes.
|
||||
#
|
||||
# Usage:
|
||||
# Normal deploy:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_<service>.yml
|
||||
#
|
||||
# Validate only (no Swarm changes):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_<service>.yml \
|
||||
# -e "stack_validate_only=true"
|
||||
#
|
||||
# Tear down:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_<service>.yml \
|
||||
# -e "<service>_deploy_state=absent"
|
||||
|
||||
# TODO: set the play name and stack name.
|
||||
- name: Deploy <service> Swarm stack
|
||||
hosts: swarm_managers
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
vars:
|
||||
# TODO: set the deploy target. Default: first Swarm manager.
|
||||
_deploy_target: "{{ groups['swarm_managers'][0] }}"
|
||||
|
||||
tasks:
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 0: Assert required secrets are present
|
||||
# WHY: Fail before any Swarm state is touched. An empty/placeholder secret
|
||||
# causes a silent misconfiguration that is hard to diagnose at runtime.
|
||||
# --------------------------------------------------
|
||||
|
||||
# TODO: add one assert block per required vault variable.
|
||||
# Remove this block entirely if the stack has no secrets.
|
||||
- name: Assert vault_<service>_secret is defined and non-empty
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_example_secret is defined
|
||||
- vault_example_secret | trim | length > 0
|
||||
- vault_example_secret not in ['change-me', 'changeme', 'TODO']
|
||||
fail_msg: >-
|
||||
vault_example_secret is not defined, empty, or still a placeholder.
|
||||
Encrypt a real value with:
|
||||
ansible-vault encrypt_string 'value' --name 'vault_example_secret'
|
||||
then add it to group_vars/vault/all.yml.
|
||||
when: inventory_hostname == _deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 1: Assert Swarm manager is active
|
||||
# WHY: Exact equality check prevents 'inactive' passing as a substring of
|
||||
# 'active' via regex. Docker format yields 'active|true' for a healthy
|
||||
# manager and nothing else valid.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Collect Swarm manager state
|
||||
ansible.builtin.command: >
|
||||
docker info --format '{{ "{{" }}.Swarm.LocalNodeState{{ "}}" }}|{{ "{{" }}.Swarm.ControlAvailable{{ "}}" }}'
|
||||
register: _swarm_info
|
||||
changed_when: false
|
||||
when: inventory_hostname == _deploy_target
|
||||
|
||||
- name: Assert target is an active Swarm manager
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- _swarm_info.stdout == 'active|true'
|
||||
fail_msg: >-
|
||||
{{ inventory_hostname }} must be an active Swarm manager.
|
||||
Expected 'active|true', got '{{ _swarm_info.stdout | default('unknown') }}'.
|
||||
when: inventory_hostname == _deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 2: Validate required bind-mount paths
|
||||
# WHY: A missing path causes the service to start against an empty/wrong
|
||||
# directory. Pre-existence assertion protects against accidental fresh
|
||||
# bootstrap over existing data.
|
||||
# TODO: add/remove paths to match the stacks volume mounts.
|
||||
# IMPORTANT: do NOT create missing paths here; require the operator to
|
||||
# provision or restore them first (data safety).
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Stat required bind-mount paths
|
||||
ansible.builtin.stat:
|
||||
path: "{{ item }}"
|
||||
register: _path_stat
|
||||
loop:
|
||||
- /mnt/homelab/apps/example/data # TODO: adjust per service
|
||||
when: inventory_hostname == _deploy_target
|
||||
|
||||
- name: Assert required paths exist before deploy
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- item.stat.exists
|
||||
- item.stat.isdir
|
||||
fail_msg: >-
|
||||
Required path '{{ item.item }}' is missing on {{ inventory_hostname }}.
|
||||
Create or restore this directory before deploying.
|
||||
loop: "{{ _path_stat.results }}"
|
||||
when: inventory_hostname == _deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 3: Deploy stack via shared role
|
||||
# WHY swarm_stack_deploy: handles template render, YAML syntax validation,
|
||||
# external-network pre-check, bind-mount directory creation, and
|
||||
# idempotent docker stack deploy with correct changed semantics.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy <service> stack
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_stack_deploy
|
||||
vars:
|
||||
stack_name: "example" # TODO: change to service name
|
||||
stack_compose_src: "{{ playbook_dir }}/../../templates/stacks/example.service.stack.yml" # TODO: change path
|
||||
# WHY <service>_deploy_state (not stack_state): using stack_state here
|
||||
# creates a Jinja2 self-reference loop inside the role. Use a
|
||||
# service-specific var that defaults cleanly.
|
||||
stack_state: "{{ example_deploy_state | default('present') }}" # TODO: rename var
|
||||
stack_required_external_networks:
|
||||
- proxy-net
|
||||
# OPTIONAL: directories the role should CREATE if absent (non-data dirs).
|
||||
# Do NOT list data directories here — assert their existence in STEP 2.
|
||||
stack_required_directories: []
|
||||
when: inventory_hostname == _deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 4: Wait for service convergence
|
||||
# WHY: Confirms the scheduler placed and started the task successfully.
|
||||
# changed_when: false — querying replica count is read-only.
|
||||
# TODO: adjust filter name and replica count to match stack_name.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Wait for <service> to converge
|
||||
ansible.builtin.command: >
|
||||
docker service ls --filter name=example_example-app --format '{{ "{{" }}.Replicas{{ "}}" }}'
|
||||
register: _replicas
|
||||
retries: 12
|
||||
delay: 10
|
||||
until: _replicas.stdout is search('1/1')
|
||||
changed_when: false
|
||||
when:
|
||||
- inventory_hostname == _deploy_target
|
||||
- example_deploy_state | default('present') == 'present'
|
||||
- not ansible_check_mode
|
||||
tags: [verify]
|
||||
|
||||
- name: Report deployment result
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "================================================"
|
||||
- "<service> deployment complete." # TODO: rename
|
||||
- "================================================"
|
||||
- "Stack : example" # TODO: rename
|
||||
- "Manager : {{ inventory_hostname }} ({{ ansible_host | default('') }})"
|
||||
- "URL : https://example.castaldifamily.com" # TODO: change
|
||||
- "Data : /mnt/homelab/apps/example" # TODO: change
|
||||
- "================================================"
|
||||
when: inventory_hostname == _deploy_target
|
||||
tags: [always]
|
||||
158
ansible/ansible-old/playbooks/docker/deploy_gitea.yml
Normal file
158
ansible/ansible-old/playbooks/docker/deploy_gitea.yml
Normal file
@ -0,0 +1,158 @@
|
||||
---
|
||||
# playbooks/docker/deploy_gitea.yml
|
||||
#
|
||||
# Purpose:
|
||||
# Deploy Gitea as a Swarm stack pinned to swarm-manager-1, with a dedicated
|
||||
# Postgres sidecar and persistent bind mounts under /mnt/homelab/apps/gitea.
|
||||
#
|
||||
# Data protection:
|
||||
# Preflight checks require all data paths to exist before deploy.
|
||||
# If paths are missing, deployment fails early to avoid creating an empty
|
||||
# data root over an existing Gitea installation.
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_gitea.yml
|
||||
#
|
||||
# Validate only (no Swarm mutations):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_gitea.yml \
|
||||
# -e "stack_validate_only=true"
|
||||
#
|
||||
# Tear down:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_gitea.yml \
|
||||
# -e "gitea_deploy_state=absent"
|
||||
|
||||
- name: Deploy Gitea Swarm stack
|
||||
hosts: swarm_managers
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
vars:
|
||||
gitea_deploy_target: "{{ edge_routing.swarm.stack_deploy_target | default(groups['swarm_managers'][0]) }}"
|
||||
|
||||
tasks:
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 0: Assert required secrets are present
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Assert vault_gitea_db_password is defined and non-empty
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_gitea_db_password is defined
|
||||
- vault_gitea_db_password | trim | length > 0
|
||||
fail_msg: >-
|
||||
vault_gitea_db_password is not defined or is empty.
|
||||
Encrypt and store it in group_vars/vault/all.yml with:
|
||||
ansible-vault encrypt_string 'your-db-password' --name 'vault_gitea_db_password'
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
|
||||
- name: Assert vault_gitea_db_password is not a placeholder
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_gitea_db_password not in ['change-me', 'changeme', 'your-db-password']
|
||||
fail_msg: "vault_gitea_db_password still appears to be a placeholder. Set a real vault value before deploy."
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 1: Assert Swarm manager is active
|
||||
# WHY exact equality: search('active') matches 'inactive' as a substring.
|
||||
# The format string yields 'active|true' only for a healthy manager.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Collect Swarm manager state
|
||||
ansible.builtin.command: >
|
||||
docker info --format '{{ "{{" }}.Swarm.LocalNodeState{{ "}}" }}|{{ "{{" }}.Swarm.ControlAvailable{{ "}}" }}'
|
||||
register: _swarm_info
|
||||
changed_when: false
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
|
||||
- name: Assert target is an active Swarm manager
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- _swarm_info.stdout == 'active|true'
|
||||
fail_msg: >-
|
||||
{{ inventory_hostname }} must be an active Swarm manager.
|
||||
Expected 'active|true', got '{{ _swarm_info.stdout | default('unknown') }}'.
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 2: Validate pre-existing persistent data paths
|
||||
# WHY: Missing paths cause Gitea to bootstrap a fresh install over existing
|
||||
# data. The operator must create or restore paths before deploying.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Stat required Gitea bind-mount paths
|
||||
ansible.builtin.stat:
|
||||
path: "{{ item }}"
|
||||
register: _gitea_path_stat
|
||||
loop:
|
||||
- /mnt/homelab/apps/gitea
|
||||
- /mnt/homelab/apps/gitea/data
|
||||
- /mnt/homelab/apps/gitea/data/db
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
|
||||
- name: Assert required Gitea paths exist before deploy
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- item.stat.exists
|
||||
- item.stat.isdir
|
||||
fail_msg: >-
|
||||
Required Gitea path '{{ item.item }}' is missing on {{ inventory_hostname }}.
|
||||
Create or restore this directory first to protect existing data.
|
||||
loop: "{{ _gitea_path_stat.results }}"
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 3: Deploy Gitea stack
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Gitea stack
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_stack_deploy
|
||||
vars:
|
||||
stack_name: "gitea"
|
||||
stack_compose_src: "{{ playbook_dir }}/../../templates/stacks/gitea.stack.yml"
|
||||
# WHY gitea_deploy_state (not stack_state): using stack_state directly
|
||||
# creates a Jinja2 self-reference loop inside the role.
|
||||
stack_state: "{{ gitea_deploy_state | default('present') }}"
|
||||
stack_required_external_networks:
|
||||
- proxy-net
|
||||
stack_required_directories:
|
||||
- /mnt/homelab/apps/gitea
|
||||
- /mnt/homelab/apps/gitea/data
|
||||
- /mnt/homelab/apps/gitea/data/db
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 4: Wait for service convergence
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Wait for Gitea server service to converge
|
||||
ansible.builtin.command: >
|
||||
docker service ls --filter name=gitea_server --format '{{ "{{" }}.Replicas{{ "}}" }}'
|
||||
register: _gitea_replicas
|
||||
retries: 18
|
||||
delay: 10
|
||||
until: _gitea_replicas.stdout is search('1/1')
|
||||
changed_when: false
|
||||
when:
|
||||
- inventory_hostname == gitea_deploy_target
|
||||
- gitea_deploy_state | default('present') == 'present'
|
||||
- not ansible_check_mode
|
||||
tags: [verify]
|
||||
|
||||
- name: Report Gitea deployment result
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "================================================"
|
||||
- "Gitea deployment complete."
|
||||
- "================================================"
|
||||
- "Stack : gitea"
|
||||
- "Manager : {{ inventory_hostname }} ({{ ansible_host | default('') }})"
|
||||
- "URL : https://git.castaldifamily.com"
|
||||
- "Data root : /mnt/homelab/apps/gitea"
|
||||
- "Services : gitea_server, gitea_gitea-db"
|
||||
- "================================================"
|
||||
when: inventory_hostname == gitea_deploy_target
|
||||
tags: [always]
|
||||
235
ansible/ansible-old/playbooks/docker/deploy_plex.yml
Normal file
235
ansible/ansible-old/playbooks/docker/deploy_plex.yml
Normal file
@ -0,0 +1,235 @@
|
||||
---
|
||||
# playbooks/docker/deploy_plex.yml
|
||||
#
|
||||
# Purpose:
|
||||
# Deploy Plex Media Server as a Swarm stack, pinned to swarm-manager-1 which
|
||||
# hosts the media volumes and hardware transcoding devices.
|
||||
#
|
||||
# Architecture:
|
||||
# Plex listens on port 32400. Traefik on Heimdall routes inbound HTTPS for
|
||||
# plex.castaldifamily.com via traefik-kop, which reads deploy.labels from
|
||||
# the Swarm service and publishes routes into Redis.
|
||||
# Media is served from bind-mounted host paths; config persists under
|
||||
# /mnt/homelab/apps/plex.
|
||||
#
|
||||
# Pre-requisites:
|
||||
# - Swarm must be active; swarm-manager-1 (10.0.0.211) must be reachable.
|
||||
# - proxy-net overlay must exist (deploy_traefik_kop.yml must have run).
|
||||
# - traefik-kop must be running on Swarm.
|
||||
# - vault_plex_claim must be present in group_vars/vault/all.yml:
|
||||
# ansible-vault encrypt_string 'claim-XXXX' --name 'vault_plex_claim'
|
||||
# - Media paths on swarm-manager-1 must be mounted:
|
||||
# /mnt/media/tvshows
|
||||
# /mnt/media/movies
|
||||
# - community.docker collection installed:
|
||||
# ansible-galaxy collection install -r requirements.yml
|
||||
#
|
||||
# Usage:
|
||||
# Normal deploy:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_plex.yml
|
||||
#
|
||||
# Validate only (preflight and syntax checks — no changes applied to Swarm):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_plex.yml \
|
||||
# -e "stack_validate_only=true"
|
||||
#
|
||||
# Tear down:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_plex.yml \
|
||||
# -e "plex_deploy_state=absent"
|
||||
#
|
||||
# Verification after deploy:
|
||||
# docker stack services plex
|
||||
# docker service ps plex_plex
|
||||
# docker exec redis redis-cli keys 'traefik/*plex*'
|
||||
# curl -sf https://plex.castaldifamily.com/web/index.html | head -5
|
||||
|
||||
- name: Deploy Plex Media Server Swarm stack
|
||||
hosts: swarm_managers
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
|
||||
tasks:
|
||||
# --------------------------------------------------
|
||||
# STEP 0: Assert required secrets are present
|
||||
# WHY: If vault_plex_claim is missing or still holds the placeholder value,
|
||||
# the stack template renders with an empty PLEX_CLAIM and Plex starts
|
||||
# unclaimed — a silent failure. Catching it here produces a clear,
|
||||
# actionable error before any Swarm state is touched.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Assert vault_plex_claim is defined and non-empty
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_plex_claim is defined
|
||||
- vault_plex_claim | length > 0
|
||||
fail_msg: >-
|
||||
vault_plex_claim is not defined or is empty.
|
||||
Encrypt your Plex claim token with:
|
||||
ansible-vault encrypt_string 'claim-XXXX' --name 'vault_plex_claim'
|
||||
then add the result to group_vars/vault/all.yml.
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
- name: Assert vault_plex_claim is not the placeholder literal
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_plex_claim != 'claim-XXXX'
|
||||
fail_msg: >-
|
||||
vault_plex_claim contains the placeholder value 'claim-XXXX'.
|
||||
Replace it with a real token from https://www.plex.tv/claim/
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 1: Assert Swarm is active and reachable
|
||||
# WHY: Fail fast before touching the stack; the role also validates this
|
||||
# but an early assert here produces a cleaner error message.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Collect Swarm manager state
|
||||
ansible.builtin.command: >
|
||||
docker info --format '{{ "{{" }}.Swarm.LocalNodeState{{ "}}" }}|{{ "{{" }}.Swarm.ControlAvailable{{ "}}" }}'
|
||||
register: _swarm_info
|
||||
changed_when: false
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
- name: Assert target is an active Swarm manager
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
# WHY exact equality: search('active') matches 'inactive' as a substring.
|
||||
# The format string yields 'active|true' only for a healthy manager.
|
||||
- _swarm_info.stdout == 'active|true'
|
||||
fail_msg: >-
|
||||
{{ inventory_hostname }} must be an active Swarm manager.
|
||||
Expected 'active|true', got '{{ _swarm_info.stdout | default('unknown') }}'.
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 1b: Validate Docker Engine version and hardware device availability
|
||||
# WHY: Device passthrough requires Docker >= 20.10. Missing devices fall
|
||||
# back to CPU transcoding silently — warn here for operator visibility.
|
||||
# These checks are NON-BLOCKING: deploy proceeds regardless of result.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Get Docker Engine version on placement node
|
||||
ansible.builtin.command: docker info --format '{{ "{{" }}.ServerVersion{{ "}}" }}'
|
||||
register: _docker_ver
|
||||
changed_when: false
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
- name: Warn if Docker Engine is below 20.10 (device passthrough may fail)
|
||||
ansible.builtin.debug:
|
||||
msg: >-
|
||||
WARNING: Docker Engine {{ _docker_ver.stdout }} may not support Swarm
|
||||
device passthrough. Required: >= 20.10. Hardware transcoding may be
|
||||
unavailable; CPU transcoding will be used as fallback.
|
||||
when:
|
||||
- inventory_hostname == groups['swarm_managers'][0]
|
||||
- _docker_ver.stdout is version('20.10', '<')
|
||||
|
||||
- name: Stat GPU device nodes on placement node
|
||||
ansible.builtin.stat:
|
||||
path: "{{ item }}"
|
||||
register: _device_stat
|
||||
loop:
|
||||
- /dev/renderD128
|
||||
- /dev/dri
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
- name: Warn on missing GPU device nodes (CPU fallback will be used)
|
||||
ansible.builtin.debug:
|
||||
msg: >-
|
||||
WARNING: Device {{ item.item }} not present on {{ inventory_hostname }}.
|
||||
Plex will fall back to CPU transcoding.
|
||||
loop: "{{ _device_stat.results }}"
|
||||
when:
|
||||
- inventory_hostname == groups['swarm_managers'][0]
|
||||
- not item.stat.exists
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 2: Verify media bind-mount paths exist on placement node
|
||||
# WHY: A missing media path causes Plex to start but serve no content.
|
||||
# Catch this before deploy to prevent a misleading "success" state.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Stat required media paths on placement node
|
||||
ansible.builtin.stat:
|
||||
path: "{{ item }}"
|
||||
register: _media_path_stat
|
||||
loop:
|
||||
- /mnt/media/tvshows
|
||||
- /mnt/media/movies
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
- name: Assert media paths are present
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- item.stat.exists
|
||||
fail_msg: >-
|
||||
Required media path '{{ item.item }}' does not exist on
|
||||
{{ inventory_hostname }}. Mount or create the path before deploying Plex.
|
||||
loop: "{{ _media_path_stat.results }}"
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 3: Deploy Plex stack
|
||||
# WHY swarm_stack_deploy role: handles template render, compose config
|
||||
# validation, external-network pre-check, directory creation, and
|
||||
# idempotent docker stack deploy with prune and registry auth.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Plex stack
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_stack_deploy
|
||||
vars:
|
||||
stack_name: "plex"
|
||||
stack_compose_src: "{{ playbook_dir }}/../../templates/stacks/plex.stack.yml"
|
||||
# WHY plex_deploy_state (not stack_state): using stack_state here would
|
||||
# create a Jinja2 self-reference loop — the role stores stack_state as
|
||||
# a template string, then any evaluation of stack_state recurses into
|
||||
# itself. plex_deploy_state is never internally defined, so
|
||||
# | default('present') always resolves cleanly.
|
||||
stack_state: "{{ plex_deploy_state | default('present') }}"
|
||||
stack_required_external_networks:
|
||||
- proxy-net
|
||||
stack_required_directories:
|
||||
- /mnt/homelab/apps/plex/data
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 4: Wait for service to reach desired replica count
|
||||
# WHY: Confirms the scheduler placed and started the task successfully,
|
||||
# rather than leaving the caller to check manually.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Wait for Plex service to converge
|
||||
ansible.builtin.command: >
|
||||
docker service ls --filter name=plex_plex --format '{{ "{{" }}.Replicas{{ "}}" }}'
|
||||
register: _plex_replicas
|
||||
retries: 12
|
||||
delay: 10
|
||||
until: _plex_replicas.stdout is search('1/1')
|
||||
changed_when: false
|
||||
when:
|
||||
- inventory_hostname == groups['swarm_managers'][0]
|
||||
- plex_deploy_state | default('present') == 'present'
|
||||
- not ansible_check_mode
|
||||
tags: [verify]
|
||||
|
||||
- name: Report deployment result
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "================================================"
|
||||
- "Plex deployment complete."
|
||||
- "================================================"
|
||||
- "Stack : plex"
|
||||
- "Manager : {{ inventory_hostname }} ({{ ansible_host | default('') }})"
|
||||
- "Port : 32400"
|
||||
- "URL : https://plex.castaldifamily.com"
|
||||
- "Config : /mnt/homelab/apps/plex/data"
|
||||
- "Media : /mnt/media/tvshows, /mnt/media/movies"
|
||||
- "------------------------------------------------"
|
||||
- "Verify route keys in Traefik Redis:"
|
||||
- " docker exec redis redis-cli keys 'traefik/*plex*'"
|
||||
- "================================================"
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
tags: [always]
|
||||
448
ansible/ansible-old/playbooks/docker/deploy_plex_standalone.yml
Normal file
448
ansible/ansible-old/playbooks/docker/deploy_plex_standalone.yml
Normal file
@ -0,0 +1,448 @@
|
||||
---
|
||||
# playbooks/docker/deploy_plex_standalone.yml
|
||||
#
|
||||
# Purpose:
|
||||
# Deploy the full Plex media stack on a standalone Docker host (statler).
|
||||
# Includes: Plex, Radarr, Sonarr, SABnzbd, Overseerr, Wizarr, and their
|
||||
# Authentik proxy outposts.
|
||||
#
|
||||
# Architecture:
|
||||
# All containers share the proxy-net bridge network. Traefik-kop on statler
|
||||
# reads container labels and publishes routes to Heimdall's Redis, where
|
||||
# the external Traefik picks them up.
|
||||
# Plex config is served from the TNAS share at /mnt/homelab/apps/plex/data.
|
||||
# Media (TV/Movies/Downloads) is served from /mnt/media (TNAS Volume2).
|
||||
# Service configs (Radarr, Sonarr, etc.) are served from /mnt/homelab/apps.
|
||||
#
|
||||
# Pre-requisites:
|
||||
# - NFS shares mounted on target host (mount_nfs_shares.yml must have run):
|
||||
# /mnt/homelab (TNAS Volume1/appdata)
|
||||
# /mnt/media (TNAS Volume2/media)
|
||||
# - traefik-kop-agent must be running on the target host.
|
||||
# - vault_plex_claim and vault_authentik_token_* must be present and decrypted.
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_plex_standalone.yml \
|
||||
# -e "target_host=statler"
|
||||
#
|
||||
# Tear down a single service (example):
|
||||
# ansible-playbook ... -e "target_host=statler plex_deploy_state=absent"
|
||||
#
|
||||
# Verification after deploy:
|
||||
# docker ps on statler
|
||||
# curl http://10.0.0.210:32400/identity
|
||||
# redis-cli -h 10.0.0.151 keys 'traefik/*sonarr*'
|
||||
|
||||
- name: Deploy Plex media stack on standalone Docker host
|
||||
hosts: "{{ target_host | default('statler') }}"
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
|
||||
vars:
|
||||
plex_network: "proxy-net"
|
||||
plex_config_dir: "/mnt/homelab/apps/plex/data"
|
||||
plex_tv_dir: "/mnt/media/tvshows"
|
||||
plex_movies_dir: "/mnt/media/movies"
|
||||
media_base: "/mnt/media"
|
||||
sabnzbd_config_dir: "/mnt/homelab/apps/sabnzbd/data"
|
||||
sonarr_config_dir: "/mnt/homelab/apps/sonarr/data"
|
||||
radarr_config_dir: "/mnt/homelab/apps/radarr/data"
|
||||
overseerr_config_dir: "/mnt/homelab/apps/overseerr/data"
|
||||
wizarr_config_dir: "/mnt/homelab/apps/wizarr/data/database"
|
||||
|
||||
tasks:
|
||||
# --------------------------------------------------
|
||||
# STEP 0: Safety assertions
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Assert target_host is explicit and safe
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- target_host is defined
|
||||
- target_host | length > 0
|
||||
- target_host not in ['all', '*', 'ubuntu_lab', 'docker_hosts', 'swarm_hosts']
|
||||
fail_msg: >-
|
||||
Invalid target_host scope. Use an explicit host, e.g.:
|
||||
-e "target_host=statler"
|
||||
run_once: true
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Assert required secrets are available and decrypted
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- vault_plex_claim is defined
|
||||
- vault_plex_claim | trim | length > 0
|
||||
- vault_plex_claim is not search('^\$ANSIBLE_VAULT;')
|
||||
- vault_authentik_token_sonarr is defined
|
||||
- vault_authentik_token_sonarr | trim | length > 0
|
||||
- vault_authentik_token_sonarr is not search('^\$ANSIBLE_VAULT;')
|
||||
- vault_authentik_token_radarr is defined
|
||||
- vault_authentik_token_radarr | trim | length > 0
|
||||
- vault_authentik_token_radarr is not search('^\$ANSIBLE_VAULT;')
|
||||
- vault_authentik_token_sabnzbd is defined
|
||||
- vault_authentik_token_sabnzbd | trim | length > 0
|
||||
- vault_authentik_token_sabnzbd is not search('^\$ANSIBLE_VAULT;')
|
||||
fail_msg: >-
|
||||
One or more required secrets are unavailable or not decrypted.
|
||||
Required: vault_plex_claim, vault_authentik_token_sonarr,
|
||||
vault_authentik_token_radarr, vault_authentik_token_sabnzbd.
|
||||
|
||||
- name: Assert TNAS Plex config directory is mounted and accessible
|
||||
ansible.builtin.stat:
|
||||
path: "{{ plex_config_dir }}"
|
||||
register: _plex_config_stat
|
||||
|
||||
- name: Fail if TNAS Plex config path does not exist
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- _plex_config_stat.stat.exists
|
||||
- _plex_config_stat.stat.isdir
|
||||
fail_msg: >-
|
||||
{{ plex_config_dir }} does not exist or is not a directory.
|
||||
Ensure the TNAS NFS share is mounted: run mount_nfs_shares.yml first.
|
||||
|
||||
- name: Assert media NFS shares are mounted
|
||||
ansible.builtin.stat:
|
||||
path: "{{ item }}"
|
||||
register: _media_stat
|
||||
loop:
|
||||
- "{{ plex_tv_dir }}"
|
||||
- "{{ plex_movies_dir }}"
|
||||
|
||||
- name: Fail if media paths are not mounted
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- item.stat.exists
|
||||
- item.stat.isdir
|
||||
fail_msg: >-
|
||||
Media path {{ item.item }} is not accessible on {{ inventory_hostname }}.
|
||||
Ensure /mnt/media NFS share is mounted: run mount_nfs_shares.yml first.
|
||||
loop: "{{ _media_stat.results }}"
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 1: Ensure proxy-net bridge network exists
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Ensure proxy-net bridge network exists
|
||||
community.docker.docker_network:
|
||||
name: "{{ plex_network }}"
|
||||
driver: bridge
|
||||
state: present
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 2: Ensure service config directories exist on appdata mount
|
||||
# WHY these dirs are on /mnt/homelab: shared appdata policy for statler
|
||||
# services while keeping explicit paths in deployment automation.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Ensure local service config directories exist
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: "1000"
|
||||
group: "1000"
|
||||
mode: '0755'
|
||||
loop:
|
||||
- "{{ sabnzbd_config_dir }}"
|
||||
- "{{ sonarr_config_dir }}"
|
||||
- "{{ radarr_config_dir }}"
|
||||
- "{{ overseerr_config_dir }}"
|
||||
- "{{ wizarr_config_dir }}"
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 3: Plex
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Plex Media Server
|
||||
community.docker.docker_container:
|
||||
name: plex
|
||||
image: lscr.io/linuxserver/plex:latest
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "32400:32400"
|
||||
env:
|
||||
PUID: "1000"
|
||||
PGID: "1000"
|
||||
TZ: America/New_York
|
||||
PLEX_CLAIM: "{{ vault_plex_claim }}"
|
||||
VERSION: docker
|
||||
volumes:
|
||||
- "{{ plex_config_dir }}:/config"
|
||||
- "{{ plex_tv_dir }}:/tv"
|
||||
- "{{ plex_movies_dir }}:/movies"
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
memory: 4g
|
||||
cpus: 2.0
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 4: SABnzbd + outpost
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy SABnzbd
|
||||
community.docker.docker_container:
|
||||
name: sabnzbd
|
||||
image: lscr.io/linuxserver/sabnzbd:4.5.5-ls239
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "8155:8080"
|
||||
env:
|
||||
PUID: "1000"
|
||||
PGID: "1000"
|
||||
TZ: America/New_York
|
||||
volumes:
|
||||
- "{{ sabnzbd_config_dir }}:/config"
|
||||
- "{{ media_base }}/incoming/downloads-sab/complete:/downloads"
|
||||
- "{{ media_base }}/incoming/downloads-sab/incomplete:/incomplete-downloads"
|
||||
- "{{ media_base }}/incoming/downloads-sab/history:/history"
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
homepage.name: SABnzbd
|
||||
homepage.icon: si:sabnzbd
|
||||
homepage.url: https://sab.castaldifamily.com
|
||||
homepage.description: Usenet downloader
|
||||
memory: 1g
|
||||
cpus: 0.5
|
||||
|
||||
- name: Deploy Authentik outpost for SABnzbd
|
||||
community.docker.docker_container:
|
||||
name: authentik-outpost-sabnzbd
|
||||
image: ghcr.io/goauthentik/proxy:2025.10.3
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "9004:9000"
|
||||
- "9447:9443"
|
||||
env:
|
||||
AUTHENTIK_HOST: https://sso.castaldifamily.com
|
||||
AUTHENTIK_INSECURE: "false"
|
||||
AUTHENTIK_TOKEN: "{{ vault_authentik_token_sabnzbd }}"
|
||||
AUTHENTIK_HOST_BROWSER: https://sso.castaldifamily.com
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.http.routers.sabnzbd.entrypoints: websecure
|
||||
traefik.http.routers.sabnzbd.rule: "Host(`sab.castaldifamily.com`)"
|
||||
traefik.http.routers.sabnzbd.tls: "true"
|
||||
traefik.http.routers.sabnzbd.tls.certresolver: cloudflare
|
||||
traefik.http.services.sabnzbd.loadbalancer.server.port: "9004"
|
||||
memory: 256m
|
||||
cpus: 0.25
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 5: Sonarr + outpost
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Sonarr
|
||||
community.docker.docker_container:
|
||||
name: sonarr
|
||||
image: lscr.io/linuxserver/sonarr:4.0.16.2944-ls300
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "8989:8989"
|
||||
env:
|
||||
PUID: "1000"
|
||||
PGID: "1000"
|
||||
TZ: America/New_York
|
||||
volumes:
|
||||
- "{{ sonarr_config_dir }}:/config"
|
||||
- "{{ plex_tv_dir }}:/tv"
|
||||
- "{{ media_base }}/incoming/downloads-sab/complete/sonarr:/downloads/sonarr"
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
homepage.name: Sonarr
|
||||
homepage.icon: si:sonarr
|
||||
homepage.url: https://sonarr.castaldifamily.com
|
||||
homepage.description: TV Shows
|
||||
memory: 1g
|
||||
cpus: 0.5
|
||||
|
||||
- name: Deploy Authentik outpost for Sonarr
|
||||
community.docker.docker_container:
|
||||
name: authentik-outpost-sonarr
|
||||
image: ghcr.io/goauthentik/proxy:2025.10.3
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "9001:9000"
|
||||
- "9444:9443"
|
||||
env:
|
||||
AUTHENTIK_HOST: https://sso.castaldifamily.com
|
||||
AUTHENTIK_INSECURE: "false"
|
||||
AUTHENTIK_TOKEN: "{{ vault_authentik_token_sonarr }}"
|
||||
AUTHENTIK_HOST_BROWSER: https://sso.castaldifamily.com
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.http.routers.sonarr.entrypoints: websecure
|
||||
traefik.http.routers.sonarr.rule: "Host(`sonarr.castaldifamily.com`)"
|
||||
traefik.http.routers.sonarr.tls: "true"
|
||||
traefik.http.routers.sonarr.tls.certresolver: cloudflare
|
||||
traefik.http.services.sonarr.loadbalancer.server.port: "9001"
|
||||
memory: 256m
|
||||
cpus: 0.25
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 6: Radarr + outpost
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Radarr
|
||||
community.docker.docker_container:
|
||||
name: radarr
|
||||
image: lscr.io/linuxserver/radarr:6.0.4.10291-ls289
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "7878:7878"
|
||||
env:
|
||||
PUID: "1000"
|
||||
PGID: "1000"
|
||||
TZ: America/New_York
|
||||
volumes:
|
||||
- "{{ radarr_config_dir }}:/config"
|
||||
- "{{ plex_movies_dir }}:/movies"
|
||||
- "{{ media_base }}/incoming/downloads-sab/complete/radarr:/downloads/radarr"
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
homepage.name: Radarr
|
||||
homepage.icon: si:radarr
|
||||
homepage.url: https://radarr.castaldifamily.com
|
||||
homepage.description: Movies & shows
|
||||
memory: 1g
|
||||
cpus: 0.5
|
||||
|
||||
- name: Deploy Authentik outpost for Radarr
|
||||
community.docker.docker_container:
|
||||
name: authentik-outpost-radarr
|
||||
image: ghcr.io/goauthentik/proxy:2025.10.3
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "9002:9000"
|
||||
- "9445:9443"
|
||||
env:
|
||||
AUTHENTIK_HOST: https://sso.castaldifamily.com
|
||||
AUTHENTIK_INSECURE: "false"
|
||||
AUTHENTIK_TOKEN: "{{ vault_authentik_token_radarr }}"
|
||||
AUTHENTIK_HOST_BROWSER: https://sso.castaldifamily.com
|
||||
AUTHENTIK_INSECURE_SKIP_VERIFY: "false"
|
||||
TRUST_PROXY_HEADERS: "true"
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.http.routers.radarr.entrypoints: websecure
|
||||
traefik.http.routers.radarr.rule: "Host(`radarr.castaldifamily.com`)"
|
||||
traefik.http.routers.radarr.tls: "true"
|
||||
traefik.http.routers.radarr.tls.certresolver: cloudflare
|
||||
traefik.http.services.radarr.loadbalancer.server.port: "9002"
|
||||
memory: 256m
|
||||
cpus: 0.25
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 7: Overseerr
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Overseerr
|
||||
community.docker.docker_container:
|
||||
name: overseerr
|
||||
image: lscr.io/linuxserver/overseerr:1.34.0
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "8150:5055"
|
||||
env:
|
||||
PUID: "1000"
|
||||
PGID: "1000"
|
||||
TZ: America/New_York
|
||||
volumes:
|
||||
- "{{ overseerr_config_dir }}:/config"
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.http.routers.overseerr.entrypoints: websecure
|
||||
traefik.http.routers.overseerr.rule: "Host(`overseerr.castaldifamily.com`)"
|
||||
traefik.http.routers.overseerr.tls: "true"
|
||||
traefik.http.routers.overseerr.tls.certresolver: cloudflare
|
||||
traefik.http.routers.overseerr.service: overseerr
|
||||
traefik.http.services.overseerr.loadbalancer.server.port: "8150"
|
||||
homepage.name: Overseerr
|
||||
homepage.icon: si:overseerr
|
||||
homepage.url: https://overseerr.castaldifamily.com
|
||||
homepage.description: Media request management
|
||||
memory: 512m
|
||||
cpus: 0.2
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 8: Wizarr
|
||||
# NOTE: homelab_status=broken in source-compose. Deploying as-is; SSO
|
||||
# integration requires a dedicated Authentik outpost token (not yet
|
||||
# configured). DISABLE_BUILTIN_AUTH=True means the web UI will be
|
||||
# unprotected until the outpost is wired up.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy Wizarr
|
||||
community.docker.docker_container:
|
||||
name: wizarr
|
||||
image: ghcr.io/wizarrrr/wizarr:v2025.12.0
|
||||
pull: always
|
||||
restart_policy: unless-stopped
|
||||
state: "{{ plex_deploy_state | default('started') }}"
|
||||
published_ports:
|
||||
- "8157:5690"
|
||||
env:
|
||||
PUID: "1000"
|
||||
PGID: "1000"
|
||||
TZ: America/New_York
|
||||
DISABLE_BUILTIN_AUTH: "True"
|
||||
volumes:
|
||||
- "{{ wizarr_config_dir }}:/data/database"
|
||||
networks:
|
||||
- name: "{{ plex_network }}"
|
||||
labels:
|
||||
traefik.enable: "true"
|
||||
traefik.http.routers.wizarr.entrypoints: websecure
|
||||
traefik.http.routers.wizarr.rule: "Host(`wizarr.castaldifamily.com`)"
|
||||
traefik.http.routers.wizarr.tls: "true"
|
||||
traefik.http.routers.wizarr.tls.certresolver: cloudflare
|
||||
traefik.http.routers.wizarr.service: wizarr
|
||||
traefik.http.services.wizarr.loadbalancer.server.port: "8157"
|
||||
homepage.name: Wizarr
|
||||
homepage.icon: si:wizarr
|
||||
homepage.url: https://wizarr.castaldifamily.com
|
||||
homepage.description: Media management
|
||||
memory: 512m
|
||||
cpus: 0.2
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 9: Summary
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Show deployment summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Plex media stack deployed to {{ inventory_hostname }}"
|
||||
- "Plex config : {{ plex_config_dir }} (TNAS)"
|
||||
- "Media : {{ media_base }} (TNAS)"
|
||||
- "Network : {{ plex_network }}"
|
||||
- "Services : plex, sabnzbd, sonarr, radarr, overseerr, wizarr"
|
||||
- "Outposts : sabnzbd (9004), sonarr (9001), radarr (9002)"
|
||||
20
ansible/ansible-old/playbooks/docker/deploy_swarm_stack.yml
Normal file
20
ansible/ansible-old/playbooks/docker/deploy_swarm_stack.yml
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
# Generic playbook to deploy one Swarm stack from a repo-tracked compose file.
|
||||
# Usage example:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_swarm_stack.yml \
|
||||
# -e "stack_name=gitea" \
|
||||
# -e "stack_compose_src=/home/chester/homelab/ansible/templates/stacks/gitea.stack.yml" \
|
||||
# -e "stack_required_directories=['/mnt/appdata/gitea']"
|
||||
|
||||
- name: Deploy one stack from source-controlled compose
|
||||
hosts: swarm_managers
|
||||
become: false
|
||||
gather_facts: false
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
|
||||
tasks:
|
||||
- name: Deploy from primary manager only
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_stack_deploy
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
160
ansible/ansible-old/playbooks/docker/deploy_traefik_kop.yml
Normal file
160
ansible/ansible-old/playbooks/docker/deploy_traefik_kop.yml
Normal file
@ -0,0 +1,160 @@
|
||||
---
|
||||
# playbooks/docker/deploy_traefik_kop.yml
|
||||
#
|
||||
# Purpose:
|
||||
# Deploy the traefik-kop Swarm service, which bridges Swarm service labels
|
||||
# to Traefik routing via Redis. Once deployed, any Swarm service labelled
|
||||
# with traefik.enable=true will have its routes published automatically.
|
||||
#
|
||||
# Architecture:
|
||||
# Swarm services → traefik-kop → Redis (10.0.0.151:6379) → Traefik (heimdall)
|
||||
# traefik-kop reads Docker service state on the Swarm manager and writes
|
||||
# routing rules to Redis. Traefik's redis provider picks them up in real time.
|
||||
#
|
||||
# Pre-requisites:
|
||||
# - Swarm must be active and swarm-manager-1 (10.0.0.211) must be reachable
|
||||
# - Redis on Heimdall (10.0.0.151:6379) must be running
|
||||
# - community.docker collection installed: ansible-galaxy collection install community.docker
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_traefik_kop.yml
|
||||
#
|
||||
# Dry-run (no changes to Swarm):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_traefik_kop.yml --check
|
||||
#
|
||||
# Tear down:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/deploy_traefik_kop.yml \
|
||||
# -e "stack_state=absent"
|
||||
#
|
||||
# Labelling Swarm services for auto-discovery:
|
||||
# After this deploys, Swarm services only need these labels (under deploy.labels):
|
||||
#
|
||||
# deploy:
|
||||
# labels:
|
||||
# - "traefik.enable=true"
|
||||
# - "traefik.http.routers.<name>.rule=Host(`<domain>.castaldifamily.com`)"
|
||||
# - "traefik.http.routers.<name>.entrypoints=websecure"
|
||||
# - "traefik.http.routers.<name>.tls.certresolver=cloudflare"
|
||||
# - "traefik.http.services.<name>.loadbalancer.server.port=<port>"
|
||||
#
|
||||
# NOTE: Use deploy.labels (not top-level labels) for Swarm services.
|
||||
# Top-level labels apply to the container image; deploy.labels apply
|
||||
# to the Swarm service — which is what traefik-kop reads.
|
||||
|
||||
- name: Deploy traefik-kop Swarm stack
|
||||
hosts: swarm_managers
|
||||
become: false
|
||||
gather_facts: false
|
||||
vars:
|
||||
traefik_kop_stack_state: "{{ stack_state | default('present') }}"
|
||||
vars_files:
|
||||
- ../../group_vars/all.yml
|
||||
|
||||
tasks:
|
||||
# --------------------------------------------------
|
||||
# STEP 1: Assert Swarm is active and reachable
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Verify target is an active Swarm manager
|
||||
ansible.builtin.command: >
|
||||
docker info --format '{{ "{{" }}.Swarm.LocalNodeState{{ "}}" }}|{{ "{{" }}.Swarm.ControlAvailable{{ "}}" }}'
|
||||
register: _swarm_info
|
||||
changed_when: false
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
- name: Assert Swarm manager pre-conditions
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- _swarm_info.stdout is search('active')
|
||||
- _swarm_info.stdout is search('true')
|
||||
fail_msg: >-
|
||||
{{ inventory_hostname }} must be an active Swarm manager.
|
||||
Current state: {{ _swarm_info.stdout | default('unknown') }}
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 2: Ensure proxy-net overlay network exists
|
||||
# WHY: The traefik-kop stack declares proxy-net as an external overlay.
|
||||
# Future Swarm services join this network to be discoverable by kop.
|
||||
# This network is separate from the bridge of the same name on Heimdall.
|
||||
# WHY attachable: allows standalone containers to join for debugging.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Ensure proxy-net overlay network exists on Swarm
|
||||
community.docker.docker_network:
|
||||
name: "{{ edge_routing.swarm.proxy_network }}"
|
||||
driver: overlay
|
||||
attachable: true
|
||||
state: present
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
tags: [network]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 3: Verify Redis is reachable from manager
|
||||
# WHY: Fail fast before deploying — if kop can't reach Redis, the
|
||||
# container will start but immediately fail to publish routes.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Verify Redis on Heimdall is reachable from Swarm manager
|
||||
ansible.builtin.wait_for:
|
||||
host: "{{ edge_routing.edge_host.ip }}"
|
||||
port: 6379
|
||||
timeout: 10
|
||||
state: started
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
tags: [preflight]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 4: Deploy traefik-kop stack
|
||||
# WHY swarm_stack_deploy role: handles template render, compose validation,
|
||||
# docker stack deploy idempotently, and external network pre-checks.
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Deploy traefik-kop stack
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_stack_deploy
|
||||
vars:
|
||||
stack_name: "traefik-kop"
|
||||
stack_compose_src: "{{ playbook_dir }}/../../templates/stacks/traefik-kop.stack.yml"
|
||||
stack_state: "{{ traefik_kop_stack_state }}"
|
||||
stack_required_external_networks:
|
||||
- "{{ edge_routing.swarm.proxy_network }}"
|
||||
stack_required_directories: []
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
tags: [deploy]
|
||||
|
||||
# --------------------------------------------------
|
||||
# STEP 5: Verify the service is running
|
||||
# --------------------------------------------------
|
||||
|
||||
- name: Wait for traefik-kop service to converge
|
||||
ansible.builtin.command: >
|
||||
docker service ls --filter name=traefik-kop_traefik-kop --format '{{ "{{" }}.Replicas{{ "}}" }}'
|
||||
register: _kop_replicas
|
||||
retries: 6
|
||||
delay: 5
|
||||
until: _kop_replicas.stdout is search('1/1')
|
||||
changed_when: false
|
||||
when:
|
||||
- inventory_hostname == groups['swarm_managers'][0]
|
||||
- traefik_kop_stack_state == 'present'
|
||||
- not ansible_check_mode
|
||||
tags: [verify]
|
||||
|
||||
- name: Report deployment result
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "================================================"
|
||||
- "traefik-kop deployment complete."
|
||||
- "================================================"
|
||||
- "Stack : traefik-kop"
|
||||
- "Manager : {{ inventory_hostname }} ({{ ansible_host | default('') }})"
|
||||
- "Redis : {{ edge_routing.integration.redis_addr }}"
|
||||
- "Bind IP : {{ edge_routing.swarm.bind_ip }}"
|
||||
- "Network : {{ edge_routing.swarm.proxy_network }} (overlay)"
|
||||
- "------------------------------------------------"
|
||||
- "To verify routes in Redis, run on Heimdall:"
|
||||
- " docker exec redis redis-cli keys 'traefik/*'"
|
||||
- "================================================"
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
tags: [always]
|
||||
181
ansible/ansible-old/playbooks/docker/heimdall_audit.yml
Normal file
181
ansible/ansible-old/playbooks/docker/heimdall_audit.yml
Normal file
@ -0,0 +1,181 @@
|
||||
---
|
||||
# playbooks/docker/heimdall_audit.yml
|
||||
# Read-only OS and stack health audit for the Heimdall edge router.
|
||||
# Safe to schedule. Makes no changes to any host.
|
||||
#
|
||||
# What this asserts:
|
||||
# OS: kernel, distro, swap, swappiness, bridge netfilter, ip_forward
|
||||
# Docker: log rotation configured
|
||||
# Stack: traefik, redis, docker-socket-proxy containers are running
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/heimdall_audit.yml
|
||||
#
|
||||
# Output:
|
||||
# outputs/heimdall_audit_<timestamp>.md (repo root)
|
||||
|
||||
- name: "Play 1: Gather Heimdall state"
|
||||
hosts: heimdall
|
||||
become: true
|
||||
gather_facts: true
|
||||
|
||||
tasks:
|
||||
- name: Read sysctl values
|
||||
ansible.builtin.shell: "sysctl -n {{ item }} 2>/dev/null || echo 0"
|
||||
register: sysctl_raw
|
||||
loop:
|
||||
- vm.swappiness
|
||||
- net.bridge.bridge-nf-call-iptables
|
||||
- net.bridge.bridge-nf-call-ip6tables
|
||||
- net.ipv4.ip_forward
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
|
||||
- name: Read Docker daemon.json
|
||||
ansible.builtin.command: cat /etc/docker/daemon.json
|
||||
register: daemon_json_content
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
check_mode: false
|
||||
|
||||
- name: Get running container names
|
||||
ansible.builtin.command: >
|
||||
docker ps --format '{{ '{{' }}.Names{{ '}}' }}'
|
||||
register: running_containers
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
check_mode: false
|
||||
|
||||
- name: Stash audit facts
|
||||
ansible.builtin.set_fact:
|
||||
heimdall_audit:
|
||||
kernel: "{{ ansible_kernel }}"
|
||||
distro: "{{ ansible_distribution }}"
|
||||
distro_version: "{{ ansible_distribution_version }}"
|
||||
swap_mb: "{{ ansible_swaptotal_mb }}"
|
||||
swappiness: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'vm.swappiness') | first).stdout | trim }}"
|
||||
bridge_iptables: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'net.bridge.bridge-nf-call-iptables') | first).stdout | trim }}"
|
||||
bridge_ip6tables: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'net.bridge.bridge-nf-call-ip6tables') | first).stdout | trim }}"
|
||||
ip_forward: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'net.ipv4.ip_forward') | first).stdout | trim }}"
|
||||
log_rotation_configured: "{{ 'max-size' in (daemon_json_content.stdout | default('{}')) }}"
|
||||
running_containers: "{{ running_containers.stdout_lines | default([]) }}"
|
||||
traefik_running: "{{ running_containers.stdout_lines | default([]) | select('search', 'traefik') | list | length > 0 }}"
|
||||
redis_running: "{{ running_containers.stdout_lines | default([]) | select('search', 'redis') | list | length > 0 }}"
|
||||
socket_proxy_running: "{{ running_containers.stdout_lines | default([]) | select('search', 'socket-proxy|socketproxy|docker-socket') | list | length > 0 }}"
|
||||
|
||||
|
||||
- name: "Play 2: Assertions and drift report"
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
audit_timestamp: "{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}"
|
||||
report_path: "{{ playbook_dir }}/../../../outputs/heimdall_audit_{{ audit_timestamp }}.md"
|
||||
h: "{{ hostvars['heimdall']['heimdall_audit'] }}"
|
||||
|
||||
tasks:
|
||||
- name: Ensure outputs directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ playbook_dir }}/../../../outputs"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Write drift report
|
||||
ansible.builtin.copy:
|
||||
dest: "{{ report_path }}"
|
||||
mode: '0644'
|
||||
content: |
|
||||
# Heimdall Edge Router Audit Report
|
||||
|
||||
Generated: {{ audit_timestamp }}
|
||||
|
||||
## System
|
||||
|
||||
| Property | Value |
|
||||
|----------|-------|
|
||||
| Kernel | `{{ h.kernel }}` |
|
||||
| Distro | {{ h.distro }} {{ h.distro_version }} |
|
||||
| Swap | {{ h.swap_mb }}MB |
|
||||
|
||||
## Sysctl
|
||||
|
||||
| Parameter | Value | Expected |
|
||||
|-----------|-------|----------|
|
||||
| vm.swappiness | {{ h.swappiness }} | 0 |
|
||||
| net.bridge.bridge-nf-call-iptables | {{ h.bridge_iptables }} | 1 |
|
||||
| net.bridge.bridge-nf-call-ip6tables | {{ h.bridge_ip6tables }} | 1 |
|
||||
| net.ipv4.ip_forward | {{ h.ip_forward }} | 1 |
|
||||
|
||||
## Docker
|
||||
|
||||
| Check | Status |
|
||||
|-------|--------|
|
||||
| Log rotation configured | {{ '✅' if h.log_rotation_configured | bool else '❌' }} |
|
||||
|
||||
## Stack Health
|
||||
|
||||
| Container | Status |
|
||||
|-----------|--------|
|
||||
| traefik | {{ '✅ running' if h.traefik_running | bool else '❌ not running' }} |
|
||||
| redis | {{ '✅ running' if h.redis_running | bool else '❌ not running' }} |
|
||||
| docker-socket-proxy | {{ '✅ running' if h.socket_proxy_running | bool else '❌ not running' }} |
|
||||
|
||||
## Running Containers
|
||||
|
||||
{% for c in h.running_containers %}
|
||||
- {{ c }}
|
||||
{% endfor %}
|
||||
|
||||
- name: Assert swap is disabled
|
||||
ansible.builtin.assert:
|
||||
that: h.swap_mb | int == 0
|
||||
fail_msg: "❌ Swap enabled: {{ h.swap_mb }}MB — run heimdall_baseline.yml --tags storage"
|
||||
success_msg: "✅ Heimdall: swap disabled"
|
||||
|
||||
- name: Assert vm.swappiness=0
|
||||
ansible.builtin.assert:
|
||||
that: h.swappiness | int == 0
|
||||
fail_msg: "❌ vm.swappiness={{ h.swappiness }} — run heimdall_baseline.yml --tags sysctl"
|
||||
success_msg: "✅ Heimdall: vm.swappiness=0"
|
||||
|
||||
- name: Assert bridge netfilter enabled
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- h.bridge_iptables | int == 1
|
||||
- h.bridge_ip6tables | int == 1
|
||||
fail_msg: >-
|
||||
❌ Bridge netfilter not fully enabled:
|
||||
bridge-nf-call-iptables={{ h.bridge_iptables }}
|
||||
bridge-nf-call-ip6tables={{ h.bridge_ip6tables }}
|
||||
Run heimdall_baseline.yml --tags sysctl.
|
||||
success_msg: "✅ Heimdall: bridge netfilter enabled"
|
||||
|
||||
- name: Assert ip_forward enabled
|
||||
ansible.builtin.assert:
|
||||
that: h.ip_forward | int == 1
|
||||
fail_msg: "❌ net.ipv4.ip_forward={{ h.ip_forward }} — run heimdall_baseline.yml --tags sysctl"
|
||||
success_msg: "✅ Heimdall: ip_forward=1"
|
||||
|
||||
- name: Assert Docker log rotation configured
|
||||
ansible.builtin.assert:
|
||||
that: h.log_rotation_configured | bool
|
||||
fail_msg: "❌ Docker log rotation not configured — run heimdall_baseline.yml --tags docker"
|
||||
success_msg: "✅ Heimdall: Docker log rotation configured"
|
||||
|
||||
- name: Assert Traefik container is running
|
||||
ansible.builtin.assert:
|
||||
that: h.traefik_running | bool
|
||||
fail_msg: "❌ Traefik container is not running — check: docker ps -a | grep traefik"
|
||||
success_msg: "✅ Heimdall: Traefik running"
|
||||
|
||||
- name: Assert Redis container is running
|
||||
ansible.builtin.assert:
|
||||
that: h.redis_running | bool
|
||||
fail_msg: "❌ Redis container is not running — check: docker ps -a | grep redis"
|
||||
success_msg: "✅ Heimdall: Redis running"
|
||||
|
||||
- name: Assert docker-socket-proxy container is running
|
||||
ansible.builtin.assert:
|
||||
that: h.socket_proxy_running | bool
|
||||
fail_msg: "❌ docker-socket-proxy container is not running — check: docker ps -a | grep socket"
|
||||
success_msg: "✅ Heimdall: docker-socket-proxy running"
|
||||
156
ansible/ansible-old/playbooks/docker/heimdall_baseline.yml
Normal file
156
ansible/ansible-old/playbooks/docker/heimdall_baseline.yml
Normal file
@ -0,0 +1,156 @@
|
||||
---
|
||||
# playbooks/docker/heimdall_baseline.yml
|
||||
# Idempotent OS baseline enforcement for the Heimdall edge router host.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# PURPOSE: Ongoing OS drift enforcement — safe to run any time, safe to schedule.
|
||||
# Does NOT upgrade packages. Does NOT reboot.
|
||||
# Does NOT touch the Traefik/Redis application stack.
|
||||
# For the application stack: use playbooks/self-heal/heimdall.yml
|
||||
# For OS updates: use playbooks/docker/heimdall_update.yml
|
||||
# For audit: use playbooks/docker/heimdall_audit.yml
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# What this enforces (all idempotent):
|
||||
# 0. Packages: Required system packages present (docker-ce, nfs-common, etc.)
|
||||
# 1. Storage: Swap disabled (swapoff + fstab + zram masked)
|
||||
# 2. Sysctl: vm.swappiness=0, bridge netfilter, ip_forward
|
||||
# 3. Docker: /etc/docker/daemon.json with log rotation
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/heimdall_baseline.yml
|
||||
#
|
||||
# # Dry-run:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/heimdall_baseline.yml --check --diff
|
||||
#
|
||||
# # Target a specific section:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/heimdall_baseline.yml --tags sysctl
|
||||
|
||||
- name: Heimdall OS baseline enforcement
|
||||
hosts: heimdall
|
||||
become: true
|
||||
|
||||
vars:
|
||||
lab_user: "{{ lab_ansible_user | default('chester') }}"
|
||||
|
||||
handlers:
|
||||
- name: Restart Docker
|
||||
ansible.builtin.service:
|
||||
name: docker
|
||||
state: restarted
|
||||
|
||||
tasks:
|
||||
- name: "0. Packages: ensure required system packages are present"
|
||||
tags: [packages, baseline]
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- docker-ce
|
||||
- docker-ce-cli
|
||||
- containerd.io
|
||||
- nfs-common
|
||||
- curl
|
||||
- htop
|
||||
- ca-certificates
|
||||
state: present
|
||||
update_cache: true
|
||||
|
||||
- name: "1. Storage: disable swap"
|
||||
tags: [storage, baseline]
|
||||
block:
|
||||
- name: Disable swap immediately (covers traditional + zram)
|
||||
ansible.builtin.command: swapoff -a
|
||||
when: ansible_swaptotal_mb > 0
|
||||
changed_when: ansible_swaptotal_mb > 0
|
||||
|
||||
- name: Comment out swap entries in /etc/fstab
|
||||
ansible.builtin.replace:
|
||||
path: /etc/fstab
|
||||
regexp: '^([^#].*\s+swap\s+.*)$'
|
||||
replace: '# \1'
|
||||
|
||||
- name: Remove zram-generator config to prevent zram swap at boot
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/systemd/zram-generator.conf
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
content: |
|
||||
# Managed by Ansible — heimdall_baseline.yml
|
||||
# Empty config disables zram swap on Ubuntu 24.04.
|
||||
|
||||
- name: Stop and mask systemd-zram-generator service if present
|
||||
ansible.builtin.systemd:
|
||||
name: systemd-zram-generator
|
||||
state: stopped
|
||||
enabled: false
|
||||
masked: true
|
||||
failed_when: false
|
||||
|
||||
- name: Swapoff zram devices explicitly
|
||||
ansible.builtin.shell: |
|
||||
for dev in $(ls /dev/zram* 2>/dev/null); do
|
||||
swapoff "$dev" 2>/dev/null || true
|
||||
done
|
||||
changed_when: false
|
||||
|
||||
- name: "2. Sysctl: Docker networking parameters"
|
||||
tags: [sysctl, baseline]
|
||||
block:
|
||||
- name: Ensure br_netfilter module is loaded
|
||||
community.general.modprobe:
|
||||
name: br_netfilter
|
||||
state: present
|
||||
|
||||
- name: Persist br_netfilter module load at boot
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/modules-load.d/br_netfilter.conf
|
||||
content: "br_netfilter\n"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Apply and persist sysctl parameters
|
||||
ansible.posix.sysctl:
|
||||
name: "{{ item.key }}"
|
||||
value: "{{ item.value }}"
|
||||
sysctl_file: /etc/sysctl.d/90-heimdall.conf
|
||||
state: present
|
||||
reload: true
|
||||
loop:
|
||||
- { key: vm.swappiness, value: "0" }
|
||||
- { key: net.bridge.bridge-nf-call-iptables, value: "1" }
|
||||
- { key: net.bridge.bridge-nf-call-ip6tables, value: "1" }
|
||||
- { key: net.ipv4.ip_forward, value: "1" }
|
||||
|
||||
- name: "3. Docker: daemon configuration and log rotation"
|
||||
tags: [docker, baseline]
|
||||
block:
|
||||
- name: Ensure /etc/docker directory exists
|
||||
ansible.builtin.file:
|
||||
path: /etc/docker
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy Docker daemon.json with log rotation
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/docker/daemon.json
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
content: |
|
||||
{
|
||||
"log-driver": "json-file",
|
||||
"log-opts": {
|
||||
"max-size": "10m",
|
||||
"max-file": "3"
|
||||
}
|
||||
}
|
||||
notify: Restart Docker
|
||||
|
||||
- name: Ensure '{{ lab_user }}' is in the docker group
|
||||
ansible.builtin.user:
|
||||
name: "{{ lab_user }}"
|
||||
groups: docker
|
||||
append: true
|
||||
81
ansible/ansible-old/playbooks/docker/heimdall_update.yml
Normal file
81
ansible/ansible-old/playbooks/docker/heimdall_update.yml
Normal file
@ -0,0 +1,81 @@
|
||||
---
|
||||
# playbooks/docker/heimdall_update.yml
|
||||
# OS package update for the Heimdall edge router host.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# ⚠️ HUMAN-TRIGGERED ONLY — do not automate or schedule.
|
||||
# Heimdall is a standalone Docker host (not in Swarm) — no drain needed.
|
||||
# Reboot will take Traefik/edge routing offline briefly.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# What this does:
|
||||
# 1. Runs apt dist-upgrade
|
||||
# 2. Reboots if a newer kernel was installed and waits for return
|
||||
# 3. Verifies Docker is back up before completing
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/heimdall_update.yml
|
||||
#
|
||||
# # Dry-run:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/heimdall_update.yml --check
|
||||
#
|
||||
# # Update packages but skip reboot even if kernel changed:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/heimdall_update.yml --skip-tags reboot
|
||||
|
||||
- name: Heimdall OS update
|
||||
hosts: heimdall
|
||||
become: true
|
||||
|
||||
tasks:
|
||||
- name: Update apt cache
|
||||
ansible.builtin.apt:
|
||||
update_cache: true
|
||||
cache_valid_time: 0
|
||||
|
||||
- name: Run apt dist-upgrade
|
||||
ansible.builtin.apt:
|
||||
upgrade: dist
|
||||
update_cache: false
|
||||
register: dist_upgrade_result
|
||||
tags: [update]
|
||||
|
||||
- name: Check if a newer kernel is installed but not yet booted
|
||||
ansible.builtin.shell: |
|
||||
LATEST=$(ls /boot/vmlinuz-* | sort -V | tail -1 | sed 's|/boot/vmlinuz-||')
|
||||
RUNNING=$(uname -r)
|
||||
if [ "$LATEST" != "$RUNNING" ]; then echo "reboot_needed"; fi
|
||||
register: reboot_check
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
tags: [reboot]
|
||||
|
||||
- name: Reboot if a newer kernel is installed
|
||||
ansible.builtin.reboot:
|
||||
msg: "Rebooting into updated kernel — initiated by heimdall_update.yml"
|
||||
reboot_timeout: 300
|
||||
when: reboot_check.stdout | trim == 'reboot_needed'
|
||||
tags: [reboot]
|
||||
|
||||
- name: Wait for Heimdall to return post-reboot
|
||||
ansible.builtin.wait_for_connection:
|
||||
delay: 10
|
||||
timeout: 300
|
||||
when: reboot_check.stdout | trim == 'reboot_needed'
|
||||
tags: [reboot]
|
||||
|
||||
- name: Wait for Docker daemon to be ready after reboot
|
||||
ansible.builtin.command: docker info
|
||||
register: docker_ready
|
||||
until: docker_ready.rc == 0
|
||||
retries: 18
|
||||
delay: 10
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
when: reboot_check.stdout | trim == 'reboot_needed'
|
||||
tags: [reboot]
|
||||
|
||||
- name: Report result
|
||||
ansible.builtin.debug:
|
||||
msg: >-
|
||||
✅ Heimdall updated.
|
||||
{{ 'Rebooted into new kernel.' if reboot_check.stdout | trim == 'reboot_needed' else 'No kernel change — reboot not required.' }}
|
||||
113
ansible/ansible-old/playbooks/docker/install_portainer.yml
Normal file
113
ansible/ansible-old/playbooks/docker/install_portainer.yml
Normal file
@ -0,0 +1,113 @@
|
||||
---
|
||||
- name: Install Portainer server
|
||||
hosts: watchtower
|
||||
become: true
|
||||
gather_facts: true
|
||||
vars:
|
||||
portainer_version: "latest"
|
||||
portainer_data_dir: "/opt/portainer/data"
|
||||
portainer_http_port: 9000
|
||||
portainer_https_port: 9443
|
||||
|
||||
tasks:
|
||||
- name: Ensure Portainer data directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ portainer_data_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy Portainer server container
|
||||
community.docker.docker_container:
|
||||
name: portainer
|
||||
image: "portainer/portainer-ce:{{ portainer_version }}"
|
||||
state: started
|
||||
restart_policy: always
|
||||
recreate: false
|
||||
pull: true
|
||||
ports:
|
||||
- "{{ portainer_http_port }}:9000"
|
||||
- "{{ portainer_https_port }}:9443"
|
||||
volumes:
|
||||
- "/var/run/docker.sock:/var/run/docker.sock"
|
||||
- "{{ portainer_data_dir }}:/data"
|
||||
|
||||
- name: Wait for Portainer server to become reachable
|
||||
ansible.builtin.wait_for:
|
||||
port: "{{ portainer_http_port }}"
|
||||
delay: 5
|
||||
timeout: 60
|
||||
state: started
|
||||
|
||||
- name: Show Portainer server endpoints
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Portainer server is running on {{ inventory_hostname }}"
|
||||
- "HTTP: http://{{ ansible_default_ipv4.address }}:{{ portainer_http_port }}"
|
||||
- "HTTPS: https://{{ ansible_default_ipv4.address }}:{{ portainer_https_port }}"
|
||||
|
||||
- name: Deploy Portainer agent service
|
||||
hosts: swarm_managers[0]
|
||||
become: true
|
||||
gather_facts: false
|
||||
vars:
|
||||
portainer_agent_version: "2.33.6"
|
||||
portainer_agent_port: 9001
|
||||
portainer_agent_network: "portainer_agent_network"
|
||||
|
||||
tasks:
|
||||
- name: Ensure Portainer overlay network exists
|
||||
community.docker.docker_network:
|
||||
name: "{{ portainer_agent_network }}"
|
||||
driver: overlay
|
||||
attachable: true
|
||||
state: present
|
||||
|
||||
- name: Deploy Portainer agent as global swarm service
|
||||
community.docker.docker_swarm_service:
|
||||
name: portainer_agent
|
||||
image: "portainer/agent:{{ portainer_agent_version }}"
|
||||
state: present
|
||||
mode: global
|
||||
publish:
|
||||
- published_port: "{{ portainer_agent_port }}"
|
||||
target_port: 9001
|
||||
protocol: tcp
|
||||
networks:
|
||||
- name: "{{ portainer_agent_network }}"
|
||||
constraints:
|
||||
- node.platform.os == linux
|
||||
mounts:
|
||||
- source: /var/run/docker.sock
|
||||
target: /var/run/docker.sock
|
||||
type: bind
|
||||
- source: /var/lib/docker/volumes
|
||||
target: /var/lib/docker/volumes
|
||||
type: bind
|
||||
- source: /
|
||||
target: /host
|
||||
type: bind
|
||||
|
||||
- name: Show Portainer agent deployment status
|
||||
ansible.builtin.command: docker service ls --filter name=portainer_agent
|
||||
register: portainer_agent_status
|
||||
changed_when: false
|
||||
|
||||
- name: Display Portainer agent summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Portainer agent service is deployed"
|
||||
- "Network: {{ portainer_agent_network }}"
|
||||
- "Status: {{ portainer_agent_status.stdout }}"
|
||||
|
||||
- name: Display Portainer installation summary
|
||||
hosts: watchtower
|
||||
gather_facts: true
|
||||
|
||||
tasks:
|
||||
- name: Show post-install summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Portainer installation complete"
|
||||
- "Server URL: http://{{ ansible_default_ipv4.address }}:9000"
|
||||
- "HTTPS URL: https://{{ ansible_default_ipv4.address }}:9443"
|
||||
- "Add Swarm environment in Portainer using any manager IP on port 9001"
|
||||
158
ansible/ansible-old/playbooks/docker/manage_containers.yml
Normal file
158
ansible/ansible-old/playbooks/docker/manage_containers.yml
Normal file
@ -0,0 +1,158 @@
|
||||
---
|
||||
- name: Manage Docker environment
|
||||
hosts: docker_hosts
|
||||
become: true
|
||||
vars:
|
||||
docker_users:
|
||||
- chester
|
||||
docker_daemon_options:
|
||||
log-driver: "json-file"
|
||||
log-opts:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
storage-driver: "overlay2"
|
||||
docker_cleanup_enabled: false
|
||||
docker_cleanup_older_than_days: 30
|
||||
|
||||
tasks:
|
||||
- name: Install Docker prerequisite packages
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- apt-transport-https
|
||||
- ca-certificates
|
||||
- curl
|
||||
- gnupg
|
||||
- lsb-release
|
||||
- python3-pip
|
||||
- python3-docker
|
||||
state: present
|
||||
update_cache: true
|
||||
|
||||
- name: Add Docker apt signing key
|
||||
ansible.builtin.apt_key:
|
||||
url: "https://download.docker.com/linux/ubuntu/gpg"
|
||||
state: present
|
||||
|
||||
- name: Add Docker apt repository
|
||||
ansible.builtin.apt_repository:
|
||||
repo: "deb [arch=amd64] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable"
|
||||
state: present
|
||||
|
||||
- name: Install Docker Engine packages
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- docker-ce
|
||||
- docker-ce-cli
|
||||
- containerd.io
|
||||
- docker-buildx-plugin
|
||||
- docker-compose-plugin
|
||||
state: present
|
||||
update_cache: true
|
||||
|
||||
- name: Ensure Docker service is enabled and started
|
||||
ansible.builtin.systemd:
|
||||
name: docker
|
||||
state: started
|
||||
enabled: true
|
||||
|
||||
- name: Configure Docker daemon options
|
||||
ansible.builtin.copy:
|
||||
content: "{{ docker_daemon_options | to_nice_json }}"
|
||||
dest: /etc/docker/daemon.json
|
||||
mode: '0644'
|
||||
notify: Restart Docker
|
||||
|
||||
- name: Add configured users to docker group
|
||||
ansible.builtin.user:
|
||||
name: "{{ item }}"
|
||||
groups: docker
|
||||
append: true
|
||||
loop: "{{ docker_users }}"
|
||||
|
||||
- name: Ensure Docker networks directory exists
|
||||
ansible.builtin.file:
|
||||
path: /etc/docker/networks
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Gather Docker host information
|
||||
community.docker.docker_host_info:
|
||||
register: docker_info
|
||||
|
||||
- name: Show Docker version
|
||||
ansible.builtin.debug:
|
||||
msg: "Docker version {{ docker_info.host_info.ServerVersion }}"
|
||||
|
||||
- name: Ensure required Docker networks exist
|
||||
community.docker.docker_network:
|
||||
name: "{{ item }}"
|
||||
state: present
|
||||
loop:
|
||||
- backend
|
||||
- frontend
|
||||
|
||||
- name: Check Docker disk usage
|
||||
ansible.builtin.command: docker system df
|
||||
register: docker_disk_usage
|
||||
changed_when: false
|
||||
|
||||
- name: Show Docker disk usage output
|
||||
ansible.builtin.debug:
|
||||
var: docker_disk_usage.stdout_lines
|
||||
|
||||
- name: Check for unhealthy containers
|
||||
ansible.builtin.command: docker ps --filter health=unhealthy --format '{{"{{.Names}}\t{{.Status}}"}}'
|
||||
register: unhealthy_containers
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Report unhealthy containers
|
||||
ansible.builtin.debug:
|
||||
msg: "Unhealthy containers detected: {{ unhealthy_containers.stdout_lines }}"
|
||||
when: unhealthy_containers.stdout | length > 0
|
||||
|
||||
- name: Prune Docker resources when cleanup is enabled
|
||||
community.docker.docker_prune:
|
||||
containers: true
|
||||
images: true
|
||||
images_filters:
|
||||
until: "{{ docker_cleanup_older_than_days * 24 }}h"
|
||||
networks: true
|
||||
volumes: true
|
||||
when: docker_cleanup_enabled
|
||||
register: docker_prune_result
|
||||
|
||||
- name: Show Docker cleanup results
|
||||
ansible.builtin.debug:
|
||||
var: docker_prune_result
|
||||
when: docker_cleanup_enabled
|
||||
|
||||
- name: Create Docker backup directory
|
||||
ansible.builtin.file:
|
||||
path: /opt/docker-backups
|
||||
state: directory
|
||||
mode: '0750'
|
||||
|
||||
- name: Find docker-compose files
|
||||
ansible.builtin.find:
|
||||
paths:
|
||||
- /opt
|
||||
- /home
|
||||
patterns: "docker-compose*.yml"
|
||||
recurse: true
|
||||
register: compose_files
|
||||
|
||||
- name: Back up docker-compose files
|
||||
ansible.builtin.copy:
|
||||
src: "{{ item.path }}"
|
||||
dest: "/opt/docker-backups/{{ item.path | basename }}.{{ ansible_date_time.date }}"
|
||||
remote_src: true
|
||||
mode: '0644'
|
||||
loop: "{{ compose_files.files }}"
|
||||
when: compose_files.files | length > 0
|
||||
|
||||
handlers:
|
||||
- name: Restart Docker
|
||||
ansible.builtin.systemd:
|
||||
name: docker
|
||||
state: restarted
|
||||
201
ansible/ansible-old/playbooks/docker/swarm_audit.yml
Normal file
201
ansible/ansible-old/playbooks/docker/swarm_audit.yml
Normal file
@ -0,0 +1,201 @@
|
||||
---
|
||||
# playbooks/docker/swarm_audit.yml
|
||||
# Read-only cross-node consistency audit for the Docker Swarm cluster.
|
||||
# Safe to schedule. Makes no changes to any host.
|
||||
#
|
||||
# What this does:
|
||||
# Play 1 — Gathers key state from all swarm_hosts nodes (kernel, distro,
|
||||
# swap, sysctl, daemon.json, Docker Swarm role)
|
||||
# Play 2 — Asserts consistency across all 6 nodes and writes a markdown
|
||||
# drift report to outputs/swarm_audit_<timestamp>.md
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_audit.yml
|
||||
#
|
||||
# Output:
|
||||
# outputs/swarm_audit_<timestamp>.md (repo root)
|
||||
|
||||
- name: "Play 1: Gather Swarm node state"
|
||||
hosts: swarm_hosts
|
||||
become: true
|
||||
gather_facts: true
|
||||
|
||||
tasks:
|
||||
- name: Read sysctl values for audit
|
||||
ansible.builtin.shell: "sysctl -n {{ item }} 2>/dev/null || echo 0"
|
||||
register: sysctl_raw
|
||||
loop:
|
||||
- vm.swappiness
|
||||
- net.bridge.bridge-nf-call-iptables
|
||||
- net.bridge.bridge-nf-call-ip6tables
|
||||
- net.ipv4.ip_forward
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
|
||||
- name: Read Docker daemon.json
|
||||
ansible.builtin.command: cat /etc/docker/daemon.json
|
||||
register: daemon_json_content
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
check_mode: false
|
||||
|
||||
- name: Get Docker Swarm node role
|
||||
ansible.builtin.shell: >
|
||||
docker info --format '{{ '{{' }}.Swarm.LocalNodeState{{ '}}' }}:{{ '{{' }}.Swarm.ControlAvailable{{ '}}' }}'
|
||||
register: docker_swarm_info
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
check_mode: false
|
||||
|
||||
- name: Stash per-node audit facts
|
||||
ansible.builtin.set_fact:
|
||||
swarm_audit:
|
||||
kernel: "{{ ansible_kernel }}"
|
||||
distro: "{{ ansible_distribution }}"
|
||||
distro_version: "{{ ansible_distribution_version }}"
|
||||
swap_mb: "{{ ansible_swaptotal_mb }}"
|
||||
swappiness: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'vm.swappiness') | first).stdout | trim }}"
|
||||
bridge_iptables: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'net.bridge.bridge-nf-call-iptables') | first).stdout | trim }}"
|
||||
bridge_ip6tables: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'net.bridge.bridge-nf-call-ip6tables') | first).stdout | trim }}"
|
||||
ip_forward: "{{ (sysctl_raw.results | selectattr('item', 'equalto', 'net.ipv4.ip_forward') | first).stdout | trim }}"
|
||||
daemon_json: "{{ daemon_json_content.stdout | default('{}') }}"
|
||||
log_rotation_configured: "{{ 'max-size' in (daemon_json_content.stdout | default('{}')) }}"
|
||||
swarm_local_state: "{{ docker_swarm_info.stdout.split(':')[0] | trim }}"
|
||||
swarm_is_manager: "{{ docker_swarm_info.stdout.split(':')[1] | trim | lower == 'true' }}"
|
||||
|
||||
|
||||
- name: "Play 2: Cross-node consistency assertions and drift report"
|
||||
hosts: localhost
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
swarm_nodes: "{{ groups['swarm_hosts'] }}"
|
||||
managers: "{{ groups['swarm_managers'] }}"
|
||||
workers: "{{ groups['swarm_workers'] }}"
|
||||
audit_timestamp: "{{ lookup('pipe', 'date +%Y%m%dT%H%M%S') }}"
|
||||
report_path: "{{ playbook_dir }}/../../../outputs/swarm_audit_{{ audit_timestamp }}.md"
|
||||
|
||||
tasks:
|
||||
- name: Ensure outputs directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ playbook_dir }}/../../../outputs"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Write drift report
|
||||
ansible.builtin.copy:
|
||||
dest: "{{ report_path }}"
|
||||
mode: '0644'
|
||||
content: |
|
||||
# Swarm Cluster Audit Report
|
||||
|
||||
Generated: {{ audit_timestamp }}
|
||||
Nodes audited: {{ swarm_nodes | join(', ') }}
|
||||
|
||||
## Node Summary
|
||||
|
||||
| Node | Role | Kernel | Distro | Swap | Swappiness | Bridge IPTables | IP Forward | Log Rotation |
|
||||
|------|------|--------|--------|------|------------|-----------------|------------|--------------|
|
||||
{% for node in swarm_nodes %}
|
||||
| {{ node }} | {{ 'Manager' if hostvars[node]['swarm_audit']['swarm_is_manager'] | bool else 'Worker' }} | `{{ hostvars[node]['swarm_audit']['kernel'] }}` | {{ hostvars[node]['swarm_audit']['distro'] }} {{ hostvars[node]['swarm_audit']['distro_version'] }} | {{ hostvars[node]['swarm_audit']['swap_mb'] }}MB | {{ hostvars[node]['swarm_audit']['swappiness'] }} | {{ hostvars[node]['swarm_audit']['bridge_iptables'] }} | {{ hostvars[node]['swarm_audit']['ip_forward'] }} | {{ '✅' if hostvars[node]['swarm_audit']['log_rotation_configured'] | bool else '❌' }} |
|
||||
{% endfor %}
|
||||
|
||||
## Swarm Role Mapping
|
||||
|
||||
| Node | Inventory Role | Docker ControlAvailable |
|
||||
|------|----------------|------------------------|
|
||||
{% for node in managers %}
|
||||
| {{ node }} | Manager | {{ '✅ true' if hostvars[node]['swarm_audit']['swarm_is_manager'] | bool else '❌ false (DRIFT!)' }} |
|
||||
{% endfor %}
|
||||
{% for node in workers %}
|
||||
| {{ node }} | Worker | {{ '❌ true (UNEXPECTED!)' if hostvars[node]['swarm_audit']['swarm_is_manager'] | bool else '✅ false' }} |
|
||||
{% endfor %}
|
||||
|
||||
## Docker Swarm State
|
||||
|
||||
| Node | LocalNodeState |
|
||||
|------|----------------|
|
||||
{% for node in swarm_nodes %}
|
||||
| {{ node }} | {{ hostvars[node]['swarm_audit']['swarm_local_state'] }} |
|
||||
{% endfor %}
|
||||
|
||||
- name: Assert kernel consistency across all nodes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['kernel'] == hostvars[swarm_nodes[0]]['swarm_audit']['kernel']
|
||||
fail_msg: >-
|
||||
❌ Kernel drift: {{ item }} has {{ hostvars[item]['swarm_audit']['kernel'] }}
|
||||
but {{ swarm_nodes[0] }} has {{ hostvars[swarm_nodes[0]]['swarm_audit']['kernel'] }}
|
||||
success_msg: "✅ {{ item }}: kernel {{ hostvars[item]['swarm_audit']['kernel'] }}"
|
||||
loop: "{{ swarm_nodes }}"
|
||||
|
||||
- name: Assert distro version consistency across all nodes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['distro_version'] == hostvars[swarm_nodes[0]]['swarm_audit']['distro_version']
|
||||
fail_msg: >-
|
||||
❌ Distro version drift: {{ item }} has {{ hostvars[item]['swarm_audit']['distro_version'] }}
|
||||
but {{ swarm_nodes[0] }} has {{ hostvars[swarm_nodes[0]]['swarm_audit']['distro_version'] }}
|
||||
success_msg: "✅ {{ item }}: distro {{ hostvars[item]['swarm_audit']['distro'] }} {{ hostvars[item]['swarm_audit']['distro_version'] }}"
|
||||
loop: "{{ swarm_nodes }}"
|
||||
|
||||
- name: Assert swap is disabled on all nodes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['swap_mb'] | int == 0
|
||||
fail_msg: "❌ Swap is enabled on {{ item }}: {{ hostvars[item]['swarm_audit']['swap_mb'] }}MB — run swarm_baseline.yml --tags storage"
|
||||
success_msg: "✅ {{ item }}: swap disabled"
|
||||
loop: "{{ swarm_nodes }}"
|
||||
|
||||
- name: Assert vm.swappiness=0 on all nodes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['swappiness'] | int == 0
|
||||
fail_msg: "❌ vm.swappiness={{ hostvars[item]['swarm_audit']['swappiness'] }} on {{ item }} — run swarm_baseline.yml --tags sysctl"
|
||||
success_msg: "✅ {{ item }}: vm.swappiness=0"
|
||||
loop: "{{ swarm_nodes }}"
|
||||
|
||||
- name: Assert bridge netfilter is enabled on all nodes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['bridge_iptables'] | int == 1
|
||||
- hostvars[item]['swarm_audit']['bridge_ip6tables'] | int == 1
|
||||
fail_msg: >-
|
||||
❌ Bridge netfilter not fully enabled on {{ item }}:
|
||||
bridge-nf-call-iptables={{ hostvars[item]['swarm_audit']['bridge_iptables'] }}
|
||||
bridge-nf-call-ip6tables={{ hostvars[item]['swarm_audit']['bridge_ip6tables'] }}
|
||||
Run swarm_baseline.yml --tags sysctl to fix.
|
||||
success_msg: "✅ {{ item }}: bridge netfilter enabled"
|
||||
loop: "{{ swarm_nodes }}"
|
||||
|
||||
- name: Assert ip_forward is enabled on all nodes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['ip_forward'] | int == 1
|
||||
fail_msg: "❌ net.ipv4.ip_forward={{ hostvars[item]['swarm_audit']['ip_forward'] }} on {{ item }} — run swarm_baseline.yml --tags sysctl"
|
||||
success_msg: "✅ {{ item }}: ip_forward=1"
|
||||
loop: "{{ swarm_nodes }}"
|
||||
|
||||
- name: Assert Docker log rotation configured on all nodes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['log_rotation_configured'] | bool
|
||||
fail_msg: "❌ Docker log rotation not configured on {{ item }} — run swarm_baseline.yml --tags docker"
|
||||
success_msg: "✅ {{ item }}: Docker log rotation configured"
|
||||
loop: "{{ swarm_nodes }}"
|
||||
|
||||
- name: Assert swarm_managers are Docker managers
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- hostvars[item]['swarm_audit']['swarm_is_manager'] | bool
|
||||
fail_msg: "❌ {{ item }} is in swarm_managers inventory group but Docker reports it is NOT a manager"
|
||||
success_msg: "✅ {{ item }}: confirmed Docker manager"
|
||||
loop: "{{ managers }}"
|
||||
|
||||
- name: Assert swarm_workers are not Docker managers
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- not (hostvars[item]['swarm_audit']['swarm_is_manager'] | bool)
|
||||
fail_msg: "❌ {{ item }} is in swarm_workers inventory group but Docker reports it is a Manager"
|
||||
success_msg: "✅ {{ item }}: confirmed Docker worker"
|
||||
loop: "{{ workers }}"
|
||||
188
ansible/ansible-old/playbooks/docker/swarm_baseline.yml
Normal file
188
ansible/ansible-old/playbooks/docker/swarm_baseline.yml
Normal file
@ -0,0 +1,188 @@
|
||||
---
|
||||
# playbooks/docker/swarm_baseline.yml
|
||||
# Idempotent Ubuntu/Docker Swarm node baseline enforcement.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# PURPOSE: Ongoing drift enforcement — safe to run any time, safe to schedule.
|
||||
# Does NOT upgrade packages. Does NOT reboot.
|
||||
# For rolling OS updates: use playbooks/docker/swarm_update.yml
|
||||
# For cross-node consistency audit: use playbooks/docker/swarm_audit.yml
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# What this enforces (all idempotent):
|
||||
# 0. Identity: Operational user, SSH key, passwordless sudo, docker group
|
||||
# 1. Packages: Required packages present (docker-ce, nfs-common, curl, htop)
|
||||
# 2. Storage: Swap disabled (swapoff -a + fstab commented)
|
||||
# 3. Sysctl: vm.swappiness=0, bridge netfilter, ip_forward (Docker requirements)
|
||||
# 4. Docker: /etc/docker/daemon.json with log rotation
|
||||
#
|
||||
# Usage:
|
||||
# # All swarm nodes:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_baseline.yml
|
||||
#
|
||||
# # Single node:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_baseline.yml --limit swarm-manager-1
|
||||
#
|
||||
# # Dry-run:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_baseline.yml --check --diff
|
||||
#
|
||||
# # Target a specific section only:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_baseline.yml --tags sysctl
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_baseline.yml --tags docker
|
||||
|
||||
- name: Swarm node baseline enforcement
|
||||
hosts: swarm_hosts
|
||||
become: true
|
||||
|
||||
vars:
|
||||
lab_user: "{{ lab_ansible_user | default('chester') }}"
|
||||
controller_ssh_pubkey_candidates:
|
||||
- "{{ lookup('env', 'HOME') }}/.ssh/id_ed25519_homelab.pub"
|
||||
- "{{ lookup('env', 'HOME') }}/.ssh/id_ed25519.pub"
|
||||
|
||||
handlers:
|
||||
- name: Restart Docker
|
||||
ansible.builtin.service:
|
||||
name: docker
|
||||
state: restarted
|
||||
|
||||
tasks:
|
||||
- name: "0. Identity: ensure user '{{ lab_user }}' is configured"
|
||||
tags: [identity, baseline]
|
||||
block:
|
||||
- name: "Ensure group '{{ lab_user }}' exists"
|
||||
ansible.builtin.group:
|
||||
name: "{{ lab_user }}"
|
||||
state: present
|
||||
|
||||
- name: "Ensure user '{{ lab_user }}' exists with sudo and docker access"
|
||||
ansible.builtin.user:
|
||||
name: "{{ lab_user }}"
|
||||
group: "{{ lab_user }}"
|
||||
groups:
|
||||
- sudo
|
||||
- docker
|
||||
append: true
|
||||
shell: /bin/bash
|
||||
password: '!'
|
||||
password_lock: true
|
||||
|
||||
- name: Locate SSH public key on control machine
|
||||
ansible.builtin.set_fact:
|
||||
controller_ssh_pubkey_path: >-
|
||||
{{ lookup('ansible.builtin.first_found', {'files': controller_ssh_pubkey_candidates, 'skip': true}) }}
|
||||
delegate_to: localhost
|
||||
become: false
|
||||
|
||||
- name: Fail early if SSH public key is missing
|
||||
ansible.builtin.fail:
|
||||
msg: >-
|
||||
SSH public key not found on the control machine.
|
||||
Checked: {{ controller_ssh_pubkey_candidates | join(', ') }}
|
||||
Generate one with: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519
|
||||
when: controller_ssh_pubkey_path | default('') | length == 0
|
||||
|
||||
- name: "Deploy SSH key to {{ lab_user }}"
|
||||
ansible.posix.authorized_key:
|
||||
user: "{{ lab_user }}"
|
||||
state: present
|
||||
key: "{{ lookup('file', controller_ssh_pubkey_path) }}"
|
||||
|
||||
- name: "Grant '{{ lab_user }}' passwordless sudo"
|
||||
ansible.builtin.copy:
|
||||
dest: "/etc/sudoers.d/{{ lab_user }}"
|
||||
content: "{{ lab_user }} ALL=(ALL) NOPASSWD: ALL\n"
|
||||
mode: '0440'
|
||||
owner: root
|
||||
group: root
|
||||
validate: '/usr/sbin/visudo -cf %s'
|
||||
|
||||
- name: "1. Packages: ensure required packages are present"
|
||||
tags: [packages, baseline]
|
||||
block:
|
||||
- name: Update apt cache
|
||||
ansible.builtin.apt:
|
||||
update_cache: true
|
||||
cache_valid_time: 3600
|
||||
|
||||
- name: Ensure required packages present
|
||||
ansible.builtin.apt:
|
||||
name:
|
||||
- docker-ce
|
||||
- docker-ce-cli
|
||||
- containerd.io
|
||||
- nfs-common
|
||||
- curl
|
||||
- htop
|
||||
- ca-certificates
|
||||
state: present
|
||||
|
||||
- name: "2. Storage: disable swap"
|
||||
tags: [storage, baseline]
|
||||
block:
|
||||
- name: Disable swap immediately
|
||||
ansible.builtin.command: swapoff -a
|
||||
when: ansible_swaptotal_mb > 0
|
||||
changed_when: ansible_swaptotal_mb > 0
|
||||
|
||||
- name: Comment out swap entries in /etc/fstab
|
||||
ansible.builtin.replace:
|
||||
path: /etc/fstab
|
||||
regexp: '^([^#].*\s+swap\s+.*)$'
|
||||
replace: '# \1'
|
||||
|
||||
- name: "3. Sysctl: apply Docker Swarm networking parameters"
|
||||
tags: [sysctl, baseline]
|
||||
block:
|
||||
- name: Ensure br_netfilter module is loaded
|
||||
community.general.modprobe:
|
||||
name: br_netfilter
|
||||
state: present
|
||||
|
||||
- name: Persist br_netfilter module load at boot
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/modules-load.d/br_netfilter.conf
|
||||
content: "br_netfilter\n"
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
|
||||
- name: Apply and persist sysctl parameters for swarm
|
||||
ansible.posix.sysctl:
|
||||
name: "{{ item.key }}"
|
||||
value: "{{ item.value }}"
|
||||
sysctl_file: /etc/sysctl.d/90-swarm.conf
|
||||
state: present
|
||||
reload: true
|
||||
loop:
|
||||
- { key: vm.swappiness, value: "0" }
|
||||
- { key: net.bridge.bridge-nf-call-iptables, value: "1" }
|
||||
- { key: net.bridge.bridge-nf-call-ip6tables, value: "1" }
|
||||
- { key: net.ipv4.ip_forward, value: "1" }
|
||||
|
||||
- name: "4. Docker: daemon configuration and log rotation"
|
||||
tags: [docker, baseline]
|
||||
block:
|
||||
- name: Ensure /etc/docker directory exists
|
||||
ansible.builtin.file:
|
||||
path: /etc/docker
|
||||
state: directory
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy Docker daemon.json with log rotation
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/docker/daemon.json
|
||||
owner: root
|
||||
group: root
|
||||
mode: '0644'
|
||||
content: |
|
||||
{
|
||||
"log-driver": "json-file",
|
||||
"log-opts": {
|
||||
"max-size": "10m",
|
||||
"max-file": "3"
|
||||
}
|
||||
}
|
||||
notify: Restart Docker
|
||||
110
ansible/ansible-old/playbooks/docker/swarm_preflight.yml
Normal file
110
ansible/ansible-old/playbooks/docker/swarm_preflight.yml
Normal file
@ -0,0 +1,110 @@
|
||||
---
|
||||
# ansible/playbooks/docker/swarm_preflight.yml
|
||||
#
|
||||
# Swarm Foundation Pre-flight
|
||||
# ===========================
|
||||
# Addresses all four hard prerequisites before any service can be deployed to
|
||||
# the swarm. Run this once after swarm_bootstrap and before swarm_stack_deploy.
|
||||
#
|
||||
# Prerequisites satisfied:
|
||||
# 1. NFS mounts — /mnt/homelab + /mnt/media mounted on every node
|
||||
# 2. proxy-net — overlay network present on the swarm (172.20.0.0/24)
|
||||
# 3. Node labels — role=manager / role=worker applied to every node
|
||||
# 4. /opt/stacks — deploy root created on every node (owned by lab user)
|
||||
#
|
||||
# Usage:
|
||||
# # Dry-run (safe, no changes):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_preflight.yml --check
|
||||
#
|
||||
# # Live run:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_preflight.yml
|
||||
#
|
||||
# # Single-concern run:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_preflight.yml --tags storage
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_preflight.yml --tags network
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_preflight.yml --tags labels
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_preflight.yml --tags stacks_root
|
||||
#
|
||||
# Verification (post-run):
|
||||
# ansible swarm_hosts -i inventory/hosts.ini -m command -a "findmnt /mnt/homelab"
|
||||
# ansible swarm_hosts -i inventory/hosts.ini -m stat -a "path=/opt/stacks"
|
||||
# docker node ls --format '{{ "{{" }}.Hostname{{ "}}" }}\t{{ "{{" }}.Labels{{ "}}" }}'
|
||||
# docker network inspect proxy-net
|
||||
|
||||
###############################################################################
|
||||
# PLAY 1 — Storage: NFS mounts + /opt/stacks on every swarm node #
|
||||
###############################################################################
|
||||
- name: "Swarm pre-flight | Storage"
|
||||
hosts: swarm_hosts
|
||||
become: true
|
||||
gather_facts: false
|
||||
tags: [storage, stacks_root]
|
||||
vars:
|
||||
lab_user: "{{ lab_ansible_user | default('chester') }}"
|
||||
|
||||
roles:
|
||||
- role: storage_mounts
|
||||
tags: [storage]
|
||||
|
||||
tasks:
|
||||
- name: "Create /opt/stacks deploy root"
|
||||
ansible.builtin.file:
|
||||
path: /opt/stacks
|
||||
state: directory
|
||||
owner: "{{ lab_user }}"
|
||||
group: "{{ lab_user }}"
|
||||
mode: "0755"
|
||||
tags: [stacks_root]
|
||||
|
||||
###############################################################################
|
||||
# PLAY 2 — Network: ensure proxy-net overlay exists (run from one manager) #
|
||||
###############################################################################
|
||||
- name: "Swarm pre-flight | proxy-net overlay network"
|
||||
hosts: swarm_managers[0]
|
||||
become: false
|
||||
gather_facts: false
|
||||
tags: [network]
|
||||
|
||||
roles:
|
||||
- role: swarm_overlay_network
|
||||
tags: [network]
|
||||
|
||||
###############################################################################
|
||||
# PLAY 3 — Labels: apply role=manager / role=worker to every swarm node #
|
||||
###############################################################################
|
||||
- name: "Swarm pre-flight | Node labels"
|
||||
hosts: swarm_managers[0]
|
||||
become: false
|
||||
gather_facts: false
|
||||
tags: [labels]
|
||||
|
||||
tasks:
|
||||
- name: "Apply role=manager label to manager nodes"
|
||||
ansible.builtin.command: >-
|
||||
docker node update --label-add role=manager {{ item }}
|
||||
loop: "{{ groups['swarm_managers'] }}"
|
||||
changed_when: false
|
||||
# docker node update is idempotent — labels are additive and
|
||||
# re-applying the same label does not change cluster state.
|
||||
tags: [labels]
|
||||
|
||||
- name: "Apply role=worker label to worker nodes"
|
||||
ansible.builtin.command: >-
|
||||
docker node update --label-add role=worker {{ item }}
|
||||
loop: "{{ groups['swarm_workers'] }}"
|
||||
changed_when: false
|
||||
tags: [labels]
|
||||
|
||||
- name: "Show node label summary"
|
||||
ansible.builtin.shell: >-
|
||||
for node in $(docker node ls --format "{{ '{{' }}.Hostname{{ '}}' }}"); do
|
||||
echo "$node $(docker node inspect $node --format '{{ '{{' }}json .Spec.Labels{{ '}}' }}')";
|
||||
done
|
||||
register: swarm_node_summary
|
||||
changed_when: false
|
||||
tags: [labels]
|
||||
|
||||
- name: "Print node label summary"
|
||||
ansible.builtin.debug:
|
||||
msg: "{{ swarm_node_summary.stdout_lines }}"
|
||||
tags: [labels]
|
||||
170
ansible/ansible-old/playbooks/docker/swarm_update.yml
Normal file
170
ansible/ansible-old/playbooks/docker/swarm_update.yml
Normal file
@ -0,0 +1,170 @@
|
||||
---
|
||||
# playbooks/docker/swarm_update.yml
|
||||
# Rolling Docker Swarm node OS update with drain-before-reboot.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# ⚠️ HUMAN-TRIGGERED ONLY — do not automate or schedule.
|
||||
# serial: 1 ensures one node is updated at a time.
|
||||
# Each node is drained before update and re-activated after reboot.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# What this does per node:
|
||||
# 1. Pre-checks that Docker Swarm is healthy on the node
|
||||
# 2. Drains the node (tasks migrate to remaining nodes)
|
||||
# 3. Runs apt dist-upgrade
|
||||
# 4. Reboots if a newer kernel was installed
|
||||
# 5. Waits for the node and Docker daemon to return online
|
||||
# 6. Re-activates the node in the swarm
|
||||
# 7. Asserts node is Ready + Active before proceeding to the next node
|
||||
#
|
||||
# NOTE: drain/restore commands are delegated to a healthy manager.
|
||||
# When updating swarm-manager-1, delegation falls back to swarm-manager-2.
|
||||
# Assumes inventory_hostname matches the Docker Swarm node name (VM hostname).
|
||||
#
|
||||
# Usage:
|
||||
# # All nodes (rolling — managers first, then workers):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_update.yml
|
||||
#
|
||||
# # Single node:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_update.yml --limit swarm-worker-1
|
||||
#
|
||||
# # Dry-run (confirms serial order and reboot conditions without modifying):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_update.yml --check
|
||||
#
|
||||
# # Update packages but skip reboot even if kernel changed:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/docker/swarm_update.yml --skip-tags reboot
|
||||
|
||||
- name: Rolling Swarm node update
|
||||
hosts: swarm_hosts
|
||||
become: true
|
||||
serial: 1
|
||||
|
||||
vars:
|
||||
# Delegate swarm CLI commands to a healthy manager.
|
||||
# If we are updating swarm-manager-1 itself, fall back to swarm-manager-2.
|
||||
swarm_delegate: >-
|
||||
{{ 'swarm-manager-2' if inventory_hostname == 'swarm-manager-1' else 'swarm-manager-1' }}
|
||||
|
||||
tasks:
|
||||
- name: "Pre-flight: verify Swarm is healthy before touching this node"
|
||||
block:
|
||||
- name: Check Docker Swarm state on this node
|
||||
ansible.builtin.shell: >
|
||||
docker info --format '{{ '{{' }}.Swarm.LocalNodeState{{ '}}' }}'
|
||||
register: swarm_pre
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
|
||||
- name: Fail if node is not an active swarm member
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- swarm_pre.stdout | trim == 'active'
|
||||
fail_msg: >-
|
||||
⛔ {{ inventory_hostname }} reports Swarm.LocalNodeState={{ swarm_pre.stdout | trim }}.
|
||||
Expected 'active'. Resolve swarm health before proceeding.
|
||||
success_msg: "✅ {{ inventory_hostname }} is an active swarm member — safe to drain"
|
||||
|
||||
- name: "Drain: migrate tasks off {{ inventory_hostname }}"
|
||||
tags: [drain]
|
||||
when: not ansible_check_mode
|
||||
block:
|
||||
- name: Set node availability to drain
|
||||
ansible.builtin.command: >
|
||||
docker node update --availability drain {{ inventory_hostname }}
|
||||
delegate_to: "{{ swarm_delegate }}"
|
||||
become: false
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for running tasks to evacuate
|
||||
ansible.builtin.shell: >
|
||||
docker node ps {{ inventory_hostname }} --filter desired-state=running -q 2>/dev/null | wc -l
|
||||
delegate_to: "{{ swarm_delegate }}"
|
||||
become: false
|
||||
register: running_tasks
|
||||
until: running_tasks.stdout | trim | int == 0
|
||||
retries: 18
|
||||
delay: 10
|
||||
changed_when: false
|
||||
|
||||
- name: "Update packages"
|
||||
block:
|
||||
- name: Update apt cache
|
||||
ansible.builtin.apt:
|
||||
update_cache: true
|
||||
cache_valid_time: 0
|
||||
|
||||
- name: Run apt dist-upgrade
|
||||
ansible.builtin.apt:
|
||||
upgrade: dist
|
||||
update_cache: false
|
||||
register: dist_upgrade_result
|
||||
tags: [update]
|
||||
|
||||
- name: Check if a newer kernel is installed but not yet booted
|
||||
ansible.builtin.shell: |
|
||||
LATEST=$(ls /boot/vmlinuz-* | sort -V | tail -1 | sed 's|/boot/vmlinuz-||')
|
||||
RUNNING=$(uname -r)
|
||||
if [ "$LATEST" != "$RUNNING" ]; then echo "reboot_needed"; fi
|
||||
register: reboot_check
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
tags: [reboot]
|
||||
|
||||
- name: Reboot if a newer kernel is installed
|
||||
ansible.builtin.reboot:
|
||||
msg: "Rebooting into updated kernel — initiated by swarm_update.yml"
|
||||
reboot_timeout: 600
|
||||
when: reboot_check.stdout | trim == 'reboot_needed'
|
||||
tags: [reboot]
|
||||
|
||||
- name: Wait for node to return post-reboot
|
||||
ansible.builtin.wait_for_connection:
|
||||
delay: 10
|
||||
timeout: 600
|
||||
when: reboot_check.stdout | trim == 'reboot_needed'
|
||||
tags: [reboot]
|
||||
|
||||
- name: Wait for Docker daemon to be ready after reboot
|
||||
ansible.builtin.command: docker info
|
||||
register: docker_ready
|
||||
until: docker_ready.rc == 0
|
||||
retries: 18
|
||||
delay: 10
|
||||
changed_when: false
|
||||
check_mode: false
|
||||
when: reboot_check.stdout | trim == 'reboot_needed'
|
||||
tags: [reboot]
|
||||
|
||||
- name: "Restore: re-activate {{ inventory_hostname }} in the swarm"
|
||||
tags: [drain]
|
||||
when: not ansible_check_mode
|
||||
block:
|
||||
- name: Set node availability back to active
|
||||
ansible.builtin.command: >
|
||||
docker node update --availability active {{ inventory_hostname }}
|
||||
delegate_to: "{{ swarm_delegate }}"
|
||||
become: false
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for node to be Ready and Active
|
||||
ansible.builtin.shell: >
|
||||
docker node ls --filter name={{ inventory_hostname }}
|
||||
delegate_to: "{{ swarm_delegate }}"
|
||||
become: false
|
||||
register: node_ls
|
||||
until: "'Ready' in node_ls.stdout and 'Active' in node_ls.stdout"
|
||||
retries: 12
|
||||
delay: 10
|
||||
changed_when: false
|
||||
|
||||
- name: Confirm node status after update
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- "'Ready' in node_ls.stdout"
|
||||
- "'Active' in node_ls.stdout"
|
||||
fail_msg: >-
|
||||
⛔ {{ inventory_hostname }} is not Ready+Active after update.
|
||||
Investigate before proceeding to the next node.
|
||||
docker node ls output:
|
||||
{{ node_ls.stdout }}
|
||||
success_msg: "✅ {{ inventory_hostname }} updated — Ready + Active. Proceeding."
|
||||
28
ansible/ansible-old/playbooks/generate_inventory.yml
Normal file
28
ansible/ansible-old/playbooks/generate_inventory.yml
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
# Generate `ansible/inventory/hosts.ini` from the central YAML SoT
|
||||
# Run locally: `ansible-playbook ansible/playbooks/generate_inventory.yml --connection=local`
|
||||
- name: Generate inventory from central source of truth
|
||||
hosts: localhost
|
||||
connection: local
|
||||
gather_facts: false
|
||||
vars:
|
||||
sod_file: "../group_vars/all.yml"
|
||||
inventory_dest: "../inventory/hosts.ini"
|
||||
|
||||
tasks:
|
||||
- name: Generate inventory file using local script
|
||||
ansible.builtin.command: "python3 ../scripts/generate_inventory.py --sot {{ sod_file }} --out /tmp/generated_hosts.ini"
|
||||
args:
|
||||
chdir: "{{ playbook_dir }}"
|
||||
changed_when: false
|
||||
|
||||
- name: Install generated inventory with backup
|
||||
ansible.builtin.copy:
|
||||
src: /tmp/generated_hosts.ini
|
||||
dest: "{{ inventory_dest }}"
|
||||
mode: '0644'
|
||||
backup: true
|
||||
|
||||
- name: Show result path
|
||||
ansible.builtin.debug:
|
||||
msg: "Wrote inventory to {{ inventory_dest }} (backup created if present)"
|
||||
@ -0,0 +1,441 @@
|
||||
---
|
||||
# playbooks/monitoring/deploy_swarm_monitoring.yml
|
||||
# Complete observability stack deployment for Docker Swarm cluster + standalone hosts
|
||||
#
|
||||
# === ARCHITECTURE OVERVIEW ===
|
||||
# This playbook deploys a three-tier monitoring solution:
|
||||
#
|
||||
# TIER 1: Data Collection (Swarm Nodes + Standalone Docker Hosts)
|
||||
# - node-exporter: Host metrics (CPU, RAM, disk, network) on swarm nodes and standalone hosts
|
||||
# - cAdvisor: Container metrics (per-container resource usage) on swarm nodes only
|
||||
#
|
||||
# TIER 2: Aggregation & Storage (Watchtower)
|
||||
# - Prometheus: Metrics time-series database
|
||||
# - Loki: Log aggregation and indexing
|
||||
#
|
||||
# TIER 3: Visualization & Alerting (Watchtower)
|
||||
# - Grafana: Dashboards and data exploration
|
||||
# - Uptime Kuma: HTTP health checks
|
||||
# - Dozzle: Real-time log viewer
|
||||
#
|
||||
# === PREREQUISITES ===
|
||||
# - Docker Swarm cluster is initialized and running
|
||||
# - All nodes are accessible via SSH
|
||||
# - Docker is installed on all nodes (swarm + standalone hosts)
|
||||
# - Authentik token is set in group_vars (for Dozzle auth)
|
||||
#
|
||||
# === USAGE ===
|
||||
# Deploy full stack (swarm nodes, standalone hosts, and watchtower):
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml
|
||||
#
|
||||
# Deploy only to swarm nodes:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml --tags swarm
|
||||
#
|
||||
# Deploy only to standalone docker hosts:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml --tags docker-hosts
|
||||
#
|
||||
# Deploy only watchtower stack:
|
||||
# ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml --tags watchtower
|
||||
|
||||
- name: Deploy monitoring exporters on swarm nodes
|
||||
hosts: swarm_hosts
|
||||
become: false
|
||||
gather_facts: true
|
||||
tags: ['swarm', 'exporters']
|
||||
|
||||
pre_tasks:
|
||||
- name: Verify Docker is installed
|
||||
ansible.builtin.command: docker --version
|
||||
register: docker_check
|
||||
changed_when: false
|
||||
failed_when: docker_check.rc != 0
|
||||
|
||||
- name: Display deployment target
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "🎯 Deploying monitoring exporters to: {{ inventory_hostname }}"
|
||||
- " Role: {{ 'Manager' if inventory_hostname in groups['swarm_managers'] else 'Worker' }}"
|
||||
- " IP: {{ ansible_host }}"
|
||||
|
||||
roles:
|
||||
- role: swarm_node_exporter
|
||||
tags: ['node-exporter']
|
||||
|
||||
- role: swarm_cadvisor
|
||||
tags: ['cadvisor']
|
||||
|
||||
- name: Deploy Dozzle swarm agents
|
||||
hosts: swarm_managers
|
||||
become: false
|
||||
gather_facts: false
|
||||
tags: ['swarm', 'dozzle-agent']
|
||||
|
||||
tasks:
|
||||
- name: Deploy and validate dozzle-agent service from primary manager
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_dozzle_agent
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
|
||||
post_tasks:
|
||||
- name: Validate exporter endpoints
|
||||
ansible.builtin.uri:
|
||||
url: "{{ item.url }}"
|
||||
method: GET
|
||||
status_code: 200
|
||||
loop:
|
||||
- { name: "node-exporter", url: "http://localhost:9100/metrics" }
|
||||
- { name: "cAdvisor", url: "http://localhost:8080/metrics" }
|
||||
loop_control:
|
||||
label: "{{ item.name }}"
|
||||
register: endpoint_check
|
||||
retries: 3
|
||||
delay: 5
|
||||
|
||||
- name: Display exporter status
|
||||
ansible.builtin.debug:
|
||||
msg: "✅ {{ inventory_hostname }}: All exporters are healthy"
|
||||
|
||||
- name: Deploy node-exporter on standalone docker hosts
|
||||
hosts: docker_hosts
|
||||
become: false
|
||||
gather_facts: true
|
||||
tags: ['docker-hosts', 'exporters', 'node-exporter']
|
||||
|
||||
pre_tasks:
|
||||
- name: Verify Docker is installed
|
||||
ansible.builtin.command: docker --version
|
||||
register: docker_check
|
||||
changed_when: false
|
||||
failed_when: docker_check.rc != 0
|
||||
|
||||
- name: Display deployment target
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "🎯 Deploying node-exporter to standalone docker host: {{ inventory_hostname }}"
|
||||
- " IP: {{ ansible_host }}"
|
||||
- " Purpose: Hardware and software metrics collection"
|
||||
|
||||
tasks:
|
||||
- name: Deploy node-exporter role with elevated privileges
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_node_exporter
|
||||
apply:
|
||||
become: true
|
||||
tags: ['node-exporter']
|
||||
|
||||
post_tasks:
|
||||
- name: Validate node-exporter endpoint
|
||||
ansible.builtin.uri:
|
||||
url: "http://localhost:9100/metrics"
|
||||
method: GET
|
||||
status_code: 200
|
||||
retries: 3
|
||||
delay: 5
|
||||
register: exporter_check
|
||||
|
||||
- name: Display node-exporter status
|
||||
ansible.builtin.debug:
|
||||
msg: "✅ {{ inventory_hostname }}: node-exporter deployed and healthy on port 9100"
|
||||
|
||||
- name: Deploy monitoring stack on Watchtower
|
||||
hosts: watchtower
|
||||
connection: local
|
||||
become: false
|
||||
gather_facts: true
|
||||
tags: ['watchtower', 'stack']
|
||||
|
||||
vars:
|
||||
# Canonical encrypted vars location (ADR-008)
|
||||
vault_encrypted_vars_file: "{{ playbook_dir }}/../../group_vars/vault/all.yml"
|
||||
|
||||
pre_tasks:
|
||||
- name: Check vault encrypted vars file state
|
||||
ansible.builtin.stat:
|
||||
path: "{{ vault_encrypted_vars_file }}"
|
||||
register: vault_vars_file_state
|
||||
|
||||
- name: Load encrypted vars when present
|
||||
ansible.builtin.include_vars:
|
||||
file: "{{ vault_encrypted_vars_file }}"
|
||||
name: vault_vars
|
||||
when: vault_vars_file_state.stat.exists
|
||||
no_log: true
|
||||
|
||||
- name: Resolve monitoring secrets from vault or environment fallback
|
||||
ansible.builtin.set_fact:
|
||||
grafana_admin_password: >-
|
||||
{{
|
||||
(
|
||||
vault_vars.vault_grafana_admin_password
|
||||
if (vault_vars is defined and 'vault_grafana_admin_password' in vault_vars)
|
||||
else (grafana_admin_password | default(''))
|
||||
) | default('', true)
|
||||
}}
|
||||
authentik_outpost_dozzle_token: >-
|
||||
{{
|
||||
(
|
||||
vault_vars.vault_authentik_outpost_dozzle_token
|
||||
if (vault_vars is defined and 'vault_authentik_outpost_dozzle_token' in vault_vars)
|
||||
else (
|
||||
secrets.AUTHENTIK_OUTPOST_DOZZLE_TOKEN
|
||||
if (secrets is defined and 'AUTHENTIK_OUTPOST_DOZZLE_TOKEN' in secrets)
|
||||
else lookup('env', 'AUTHENTIK_OUTPOST_DOZZLE_TOKEN')
|
||||
)
|
||||
) | default('', true)
|
||||
}}
|
||||
pve_exporter_token: >-
|
||||
{{
|
||||
(
|
||||
vault_vars.vault_pve_exporter_token
|
||||
if (vault_vars is defined and 'vault_pve_exporter_token' in vault_vars)
|
||||
else lookup('env', 'PVE_EXPORTER_TOKEN')
|
||||
) | default('', true)
|
||||
}}
|
||||
no_log: true
|
||||
|
||||
- name: Verify Docker Compose V2 is available
|
||||
ansible.builtin.command: docker compose version
|
||||
register: compose_check
|
||||
changed_when: false
|
||||
failed_when: compose_check.rc != 0
|
||||
|
||||
- name: Display Watchtower deployment info
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "🏗️ Deploying monitoring stack to Watchtower"
|
||||
- " Swarm targets: {{ groups['swarm_managers'] | length }} managers + {{ groups['swarm_workers'] | length }} workers"
|
||||
- " Standalone hosts: {{ groups['docker_hosts'] | length }} (node-exporter)"
|
||||
- " Total monitored nodes: {{ groups['swarm_hosts'] | length + groups['docker_hosts'] | length + 1 }} (including Watchtower)"
|
||||
|
||||
roles:
|
||||
- role: monitoring_stack
|
||||
|
||||
post_tasks:
|
||||
- name: Wait for Prometheus to be ready
|
||||
ansible.builtin.uri:
|
||||
url: "http://{{ watchtower_ip }}:{{ prometheus_host_port }}/-/ready"
|
||||
method: GET
|
||||
status_code: 200
|
||||
retries: 10
|
||||
delay: 5
|
||||
register: prometheus_ready
|
||||
when: not (monitoring_focus_mode | default(false) | bool) or (monitoring_focus_service | default('') == 'prometheus')
|
||||
|
||||
- name: Verify Prometheus can scrape all targets
|
||||
ansible.builtin.uri:
|
||||
url: "http://{{ watchtower_ip }}:{{ prometheus_host_port }}/api/v1/targets"
|
||||
method: GET
|
||||
return_content: true
|
||||
register: prometheus_targets
|
||||
retries: 3
|
||||
delay: 10
|
||||
when: not (monitoring_focus_mode | default(false) | bool) or (monitoring_focus_service | default('') == 'prometheus')
|
||||
|
||||
- name: Build watchtower edge route backend reconciliation list
|
||||
ansible.builtin.set_fact:
|
||||
watchtower_edge_route_backends: >-
|
||||
{{
|
||||
[
|
||||
{'name': 'grafana', 'url': 'http://' ~ watchtower_ip ~ ':' ~ (grafana_port | string)},
|
||||
{'name': 'uptime', 'url': 'http://' ~ watchtower_ip ~ ':' ~ (uptime_kuma_port | string)}
|
||||
]
|
||||
+
|
||||
(
|
||||
[
|
||||
{'name': 'dozzle', 'url': 'http://' ~ watchtower_ip ~ ':' ~ (dozzle_port | string)}
|
||||
]
|
||||
if (monitoring_enable_dozzle | default(false) | bool) and (dozzle_expose_via_traefik | default(false) | bool)
|
||||
else []
|
||||
)
|
||||
+
|
||||
(
|
||||
[
|
||||
{'name': 'authentik-outpost-dozzle', 'url': 'http://' ~ watchtower_ip ~ ':' ~ (authentik_outpost_port | string)}
|
||||
]
|
||||
if monitoring_enable_authentik_outpost | default(false) | bool
|
||||
else []
|
||||
)
|
||||
+
|
||||
[
|
||||
{'name': 'portainer', 'url': 'http://' ~ watchtower_ip ~ ':' ~ (portainer_http_port | string)}
|
||||
]
|
||||
}}
|
||||
|
||||
- name: Reconcile watchtower service backends in Redis edge routing
|
||||
ansible.builtin.command: >-
|
||||
ssh {{ (edge_routing | default({})).get('edge_host', {}).get('ip', '10.0.0.151') }}
|
||||
sudo docker exec redis redis-cli SET
|
||||
traefik/http/services/{{ item.name }}/loadBalancer/servers/0/url
|
||||
{{ item.url }}
|
||||
changed_when: true
|
||||
loop: "{{ watchtower_edge_route_backends }}"
|
||||
loop_control:
|
||||
label: "{{ item.name }} -> {{ item.url }}"
|
||||
|
||||
- name: Verify reconciled watchtower service backends in Redis
|
||||
ansible.builtin.command: >-
|
||||
ssh {{ (edge_routing | default({})).get('edge_host', {}).get('ip', '10.0.0.151') }}
|
||||
sudo docker exec redis redis-cli GET
|
||||
traefik/http/services/{{ item.name }}/loadBalancer/servers/0/url
|
||||
register: watchtower_route_backend_reads
|
||||
changed_when: false
|
||||
loop: "{{ watchtower_edge_route_backends }}"
|
||||
loop_control:
|
||||
label: "{{ item.name }}"
|
||||
|
||||
- name: Assert watchtower service backends are reconciled to host IP routes
|
||||
ansible.builtin.assert:
|
||||
that:
|
||||
- item.stdout == item.item.url
|
||||
fail_msg: >-
|
||||
Edge route drift persisted for {{ item.item.name }}.
|
||||
Expected {{ item.item.url }}, got {{ item.stdout | default('') }}.
|
||||
success_msg: >-
|
||||
Edge route {{ item.item.name }} correctly reconciled to {{ item.item.url }}.
|
||||
loop: "{{ watchtower_route_backend_reads.results }}"
|
||||
loop_control:
|
||||
label: "{{ item.item.name }}"
|
||||
|
||||
- name: Display monitoring stack summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "╔════════════════════════════════════════════════════════╗"
|
||||
- "║ 🎉 SWARM MONITORING STACK DEPLOYED SUCCESSFULLY! ║"
|
||||
- "╚════════════════════════════════════════════════════════╝"
|
||||
- ""
|
||||
- "📊 METRICS & DASHBOARDS:"
|
||||
- " Prometheus: http://{{ watchtower_ip }}:{{ prometheus_host_port }}"
|
||||
- " Grafana: https://{{ grafana_domain }}"
|
||||
- ""
|
||||
- "📋 LOGS:"
|
||||
- " Dozzle: https://{{ dozzle_domain }}"
|
||||
- " Loki API: http://{{ watchtower_ip }}:{{ loki_port }}"
|
||||
- ""
|
||||
- "✅ UPTIME:"
|
||||
- " Uptime Kuma: https://{{ uptime_domain }}"
|
||||
- ""
|
||||
- "🔍 NEXT STEPS:"
|
||||
- " 1. Open Grafana: https://{{ grafana_domain }}"
|
||||
- " 2. Verify provisioned data sources: {{ grafana_prometheus_datasource_name }} + {{ grafana_loki_datasource_name }}"
|
||||
- " 3. Review the provisioned dashboard folder: {{ grafana_dashboards_folder }}"
|
||||
- " 4. Optionally import extra dashboards: 1860, 893, 13639, 10347"
|
||||
- " 5. Configure Uptime Kuma health checks for swarm services"
|
||||
- ""
|
||||
- "📚 CONCEPTS YOU LEARNED:"
|
||||
- " ✓ Multi-tier monitoring architecture"
|
||||
- " ✓ Prometheus service discovery & scraping"
|
||||
- " ✓ Loki label-based log indexing"
|
||||
- " ✓ Ansible roles for modular infrastructure"
|
||||
- " ✓ Idempotent deployment (run this playbook anytime!)"
|
||||
when: not (monitoring_focus_mode | default(false) | bool) or (monitoring_focus_service | default('') == 'prometheus')
|
||||
|
||||
- name: Display focused deployment summary
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Focused deployment completed"
|
||||
- "Service: {{ monitoring_focus_service | default('not-set') }}"
|
||||
- "Mode: additive (existing running services preserved)"
|
||||
when: monitoring_focus_mode | default(false) | bool and (monitoring_focus_service | default('') != 'prometheus')
|
||||
|
||||
- name: Generate monitoring documentation
|
||||
hosts: localhost
|
||||
connection: local
|
||||
gather_facts: false
|
||||
tags: ['docs']
|
||||
run_once: true
|
||||
|
||||
tasks:
|
||||
- name: Create monitoring quick-reference guide
|
||||
ansible.builtin.copy:
|
||||
dest: "{{ playbook_dir }}/../../documentation/swarm-monitoring-guide.md"
|
||||
mode: '0644'
|
||||
content: |
|
||||
# Docker Swarm Monitoring Guide
|
||||
|
||||
**Deployed:** {{ ansible_date_time.iso8601 }}
|
||||
**Cluster:** {{ groups['swarm_hosts'] | length }} nodes ({{ groups['swarm_managers'] | length }} managers, {{ groups['swarm_workers'] | length }} workers)
|
||||
|
||||
## Quick Access
|
||||
|
||||
| Service | URL | Purpose |
|
||||
|---------|-----|---------|
|
||||
| Prometheus | http://{{ hostvars['localhost'].watchtower_ip }}:{{ hostvars['localhost'].prometheus_port }} | Metrics storage & query |
|
||||
| Grafana | https://{{ hostvars['localhost'].grafana_domain }} | Dashboards & visualization |
|
||||
| Loki | http://{{ hostvars['localhost'].watchtower_ip }}:{{ hostvars['localhost'].loki_port }} | Log aggregation |
|
||||
| Dozzle | https://{{ hostvars['localhost'].dozzle_domain }} | Real-time log viewer |
|
||||
| Uptime Kuma | https://{{ hostvars['localhost'].uptime_domain }} | Service uptime tracking |
|
||||
|
||||
## Monitored Nodes
|
||||
|
||||
### Managers
|
||||
{% for host in groups['swarm_managers'] %}
|
||||
- **{{ host }}** ({{ hostvars[host].ansible_host }})
|
||||
- node-exporter: http://{{ hostvars[host].ansible_host }}:9100/metrics
|
||||
- cAdvisor: http://{{ hostvars[host].ansible_host }}:8080/metrics
|
||||
{% endfor %}
|
||||
|
||||
### Workers
|
||||
{% for host in groups['swarm_workers'] %}
|
||||
- **{{ host }}** ({{ hostvars[host].ansible_host }})
|
||||
- node-exporter: http://{{ hostvars[host].ansible_host }}:9100/metrics
|
||||
- cAdvisor: http://{{ hostvars[host].ansible_host }}:8080/metrics
|
||||
{% endfor %}
|
||||
|
||||
## Useful Prometheus Queries
|
||||
|
||||
```promql
|
||||
# Total cluster CPU usage
|
||||
100 - (avg(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
|
||||
|
||||
# Memory usage per node
|
||||
(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100
|
||||
|
||||
# Container count per node
|
||||
count(container_last_seen) by (instance)
|
||||
|
||||
# Network traffic by node
|
||||
rate(node_network_receive_bytes_total[5m])
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Exporter not reachable
|
||||
```bash
|
||||
# Check if container is running
|
||||
ansible swarm_hosts -i inventory/hosts.ini -a "docker ps | grep exporter"
|
||||
|
||||
# Check firewall
|
||||
ansible swarm_hosts -i inventory/hosts.ini -a "ss -tlnp | grep -E '9100|8080'"
|
||||
```
|
||||
|
||||
### Prometheus shows target down
|
||||
```bash
|
||||
# Test from Watchtower
|
||||
curl http://<node-ip>:9100/metrics
|
||||
curl http://<node-ip>:8080/metrics
|
||||
```
|
||||
|
||||
## Maintenance
|
||||
|
||||
### Update all monitoring components
|
||||
```bash
|
||||
cd /home/chester/homelab/ansible
|
||||
ansible-playbook -i inventory/hosts.ini playbooks/monitoring/deploy_swarm_monitoring.yml
|
||||
```
|
||||
|
||||
### View Prometheus configuration
|
||||
```bash
|
||||
cat /opt/stacks/watchtower/prometheus-config/prometheus.yml
|
||||
```
|
||||
|
||||
### Check alert rules
|
||||
```bash
|
||||
cat /opt/stacks/watchtower/prometheus-config/alerts/homelab.yml
|
||||
```
|
||||
|
||||
register: docs_created
|
||||
|
||||
- name: Display documentation location
|
||||
ansible.builtin.debug:
|
||||
msg: "📚 Monitoring guide created at: {{ docs_created.dest }}"
|
||||
when: docs_created.changed
|
||||
14
ansible/ansible-old/playbooks/network/baseline_config.yml
Normal file
14
ansible/ansible-old/playbooks/network/baseline_config.yml
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
# playbooks/baseline_network_config.yml
|
||||
# Baseline network config for static IPs and VLAN prep
|
||||
- name: Baseline network configuration
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
tasks:
|
||||
- name: Document static IP assignments
|
||||
ansible.builtin.debug:
|
||||
msg: "[Info] Ensure static IPs match contracts for {{ inventory_hostname }}."
|
||||
|
||||
- name: Review VLAN segmentation readiness
|
||||
ansible.builtin.debug:
|
||||
msg: "[Info] VLANs not yet in use, but config should be reviewed periodically."
|
||||
@ -0,0 +1,22 @@
|
||||
---
|
||||
# Create the shared Swarm overlay network used by edge-routed services.
|
||||
|
||||
- name: Create proxy overlay network on swarm manager
|
||||
hosts: swarm_managers
|
||||
become: false
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
# Mirrors the current standalone Docker bridge values from migration inputs.
|
||||
swarm_overlay_network_name: "proxy-net"
|
||||
swarm_overlay_network_subnet: "172.20.0.0/24"
|
||||
swarm_overlay_network_gateway: "172.20.0.1"
|
||||
swarm_overlay_network_attachable: true
|
||||
swarm_overlay_network_internal: false
|
||||
swarm_overlay_network_mtu: "1500"
|
||||
|
||||
tasks:
|
||||
- name: Run network creation only once from the primary manager
|
||||
ansible.builtin.include_role:
|
||||
name: swarm_overlay_network
|
||||
when: inventory_hostname == groups['swarm_managers'][0]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user