157 lines
4.6 KiB
Django/Jinja
157 lines
4.6 KiB
Django/Jinja
---
|
|
# roles/monitoring_stack/templates/prometheus.yml.j2
|
|
# Prometheus configuration with dynamic swarm cluster discovery
|
|
|
|
global:
|
|
scrape_interval: {{ prometheus_scrape_interval }}
|
|
evaluation_interval: {{ prometheus_scrape_interval }}
|
|
external_labels:
|
|
cluster: 'homelab'
|
|
environment: 'production'
|
|
|
|
# === BEST PRACTICE: Alerting Rules ===
|
|
# Separate alert rules into external files for maintainability
|
|
rule_files:
|
|
- '/etc/prometheus/alerts/*.yml'
|
|
|
|
# === CONCEPT: Scrape Configs ===
|
|
# Each job defines a set of targets to monitor
|
|
# Prometheus will scrape /metrics from each endpoint
|
|
scrape_configs:
|
|
# Monitor Prometheus itself (meta-monitoring)
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:{{ prometheus_port }}']
|
|
labels:
|
|
role: 'monitoring'
|
|
host: 'watchtower'
|
|
|
|
# === WATCHTOWER NODE METRICS ===
|
|
- job_name: 'watchtower-node'
|
|
static_configs:
|
|
- targets: ['node-exporter:9100']
|
|
labels:
|
|
role: 'controller'
|
|
host: 'watchtower'
|
|
|
|
# === WATCHTOWER LOCAL CONTAINER METRICS ===
|
|
- job_name: 'watchtower-containers'
|
|
static_configs:
|
|
- targets: ['watchtower-cadvisor:8080']
|
|
labels:
|
|
role: 'controller'
|
|
host: 'watchtower'
|
|
metric_source: 'cadvisor'
|
|
|
|
# === SWARM MANAGER NODE METRICS ===
|
|
# Generated dynamically from [swarm_managers] inventory group
|
|
- job_name: 'swarm-managers-node'
|
|
static_configs:
|
|
- targets:
|
|
{% for host in groups['swarm_managers'] %}
|
|
- '{{ hostvars[host].ansible_host }}:9100'
|
|
{% endfor %}
|
|
labels:
|
|
role: 'manager'
|
|
cluster: 'swarm'
|
|
|
|
# === SWARM WORKER NODE METRICS ===
|
|
- job_name: 'swarm-workers-node'
|
|
static_configs:
|
|
- targets:
|
|
{% for host in groups['swarm_workers'] %}
|
|
- '{{ hostvars[host].ansible_host }}:9100'
|
|
{% endfor %}
|
|
labels:
|
|
role: 'worker'
|
|
cluster: 'swarm'
|
|
|
|
# === CONTAINER METRICS (cAdvisor) ===
|
|
- job_name: 'swarm-managers-containers'
|
|
static_configs:
|
|
- targets:
|
|
{% for host in groups['swarm_managers'] %}
|
|
- '{{ hostvars[host].ansible_host }}:8080'
|
|
{% endfor %}
|
|
labels:
|
|
role: 'manager'
|
|
cluster: 'swarm'
|
|
|
|
- job_name: 'swarm-workers-containers'
|
|
static_configs:
|
|
- targets:
|
|
{% for host in groups['swarm_workers'] %}
|
|
- '{{ hostvars[host].ansible_host }}:8080'
|
|
{% endfor %}
|
|
labels:
|
|
role: 'worker'
|
|
cluster: 'swarm'
|
|
|
|
# === PRO-TIP: Docker Hosts ===
|
|
# Monitor standalone Docker hosts (heimdall, waldorf)
|
|
{% if groups['docker_hosts'] is defined %}
|
|
- job_name: 'docker-hosts-node'
|
|
static_configs:
|
|
- targets:
|
|
{% for host in groups['docker_hosts'] %}
|
|
- '{{ hostvars[host].ansible_host }}:9100'
|
|
{% endfor %}
|
|
labels:
|
|
role: 'standalone'
|
|
{% endif %}
|
|
|
|
# === BLACKBOX PROBES (NETWORK / ENDPOINT HEALTH) ===
|
|
- job_name: 'blackbox-probes'
|
|
metrics_path: /probe
|
|
params:
|
|
module: [http_2xx]
|
|
static_configs:
|
|
{% for probe in monitoring_probe_targets %}
|
|
- targets: ['{{ probe.target }}']
|
|
labels:
|
|
probe_name: '{{ probe.name }}'
|
|
module: '{{ probe.module }}'
|
|
{% endfor %}
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
- source_labels: [module]
|
|
target_label: __param_module
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- target_label: __address__
|
|
replacement: 'blackbox-exporter:{{ blackbox_port }}'
|
|
|
|
# === PROXMOX CLUSTER METRICS (via pve_exporter) ===
|
|
# pve_exporter authenticates to the Proxmox API using a read-only PVEAuditor token.
|
|
# Each PVE node is passed as ?target= and the request is routed through the exporter.
|
|
- job_name: 'proxmox'
|
|
metrics_path: /pve
|
|
params:
|
|
module: [default]
|
|
static_configs:
|
|
- targets:
|
|
{% for host in groups['proxmox_cluster'] %}
|
|
- '{{ hostvars[host].ansible_host }}'
|
|
{% endfor %}
|
|
labels:
|
|
cluster: 'pve'
|
|
relabel_configs:
|
|
- source_labels: [__address__]
|
|
target_label: __param_target
|
|
- source_labels: [__param_target]
|
|
target_label: instance
|
|
- target_label: __address__
|
|
replacement: 'pve-exporter:9221'
|
|
|
|
# === FUTURE: Swarm Service Discovery ===
|
|
# Uncomment to enable automatic discovery of swarm services
|
|
# Requires Docker API to be exposed on managers
|
|
# - job_name: 'swarm-services'
|
|
# dockerswarm_sd_configs:
|
|
# - host: unix:///var/run/docker.sock
|
|
# role: tasks
|
|
# relabel_configs:
|
|
# - source_labels: [__meta_dockerswarm_service_name]
|
|
# target_label: service
|