homelab/ansible/archive/scripts/audit_prune_gitea_runners.py

188 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
Audit and optionally prune Gitea Actions runners.
This script is intentionally conservative:
- It can parse a simple text/CSV listing of runners (id,status,name,last_online) or
accept manual runner entries.
- It will by default run in dry-run mode and print the curl commands needed to
delete selected runners. To actually perform deletions supply --prune and
provide environment variables `GITEA_URL` and `GITEA_TOKEN`.
USAGE (dry-run):
python3 audit_prune_gitea_runners.py --input runners.txt --threshold-hours 24
USAGE (execute):
GITEA_URL="https://gitea.example.com" GITEA_TOKEN="<token>" \
python3 audit_prune_gitea_runners.py --input runners.txt --threshold-hours 24 --prune
Notes:
- Verify the generated DELETE endpoint before running. Gitea Action Runners API paths
vary by version; the script assumes a generic endpoint: /api/v1/actions/runners/{id}
If your Gitea uses a different path, run in dry-run and adjust commands accordingly.
- Token must have admin privileges to remove runners.
"""
import argparse
import csv
import datetime
import os
import shlex
import subprocess
from typing import List, Dict
def parse_input(path: str) -> List[Dict]:
"""Parse a simple CSV or whitespace-delimited file containing runner info.
Expected columns (header optional): id,name,status,last_online
last_online should be ISO or human-friendly; we attempt flexible parse by
treating empty as unknown (safe: don't delete).
"""
rows = []
with open(path, 'r') as f:
sample = f.read()
# Try CSV parse first
try:
with open(path, newline='') as csvfile:
reader = csv.DictReader(csvfile)
# If headerless, DictReader will treat first line as header; fallback
for r in reader:
rows.append({k.strip(): v.strip() for k, v in r.items()})
if rows:
return rows
except Exception:
pass
# Fallback: parse whitespace lines like "Status\tID\tName\tLastOnline" or
# entries separated by multiple spaces / tabs. We'll extract numeric id and status.
for line in sample.splitlines():
line = line.strip()
if not line:
continue
parts = [p for p in line.split() if p]
# try to find numeric token for id
id_token = None
for p in parts:
if p.isdigit():
id_token = p
break
status = 'unknown'
name = ''
last_online = ''
if 'Idle' in line or 'idle' in line:
status = 'idle'
if 'Offline' in line or 'offline' in line:
status = 'offline'
# best effort name
if len(parts) >= 4:
name = parts[-2]
last_online = parts[-1]
rows.append({'id': id_token or '', 'name': name, 'status': status, 'last_online': last_online})
return rows
def candidates(rows: List[Dict], threshold_hours: int) -> List[Dict]:
out = []
now = datetime.datetime.utcnow()
for r in rows:
status = r.get('status', '').lower()
if status != 'offline':
continue
lo = r.get('last_online') or r.get('Last Online Time') or ''
# Try parse common formats, fallback to include (safe)
parsed = None
for fmt in ('%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d', '%b %d', '%Y-%m-%dT%H:%M:%SZ'):
try:
parsed = datetime.datetime.strptime(lo, fmt)
break
except Exception:
parsed = None
if parsed is None:
# If no parse, assume offline candidate (but include in output for manual review)
out.append(r)
continue
if (now - parsed).total_seconds() >= threshold_hours * 3600:
out.append(r)
return out
def gen_delete_command(gitea_url: str, token: str, runner_id: str) -> str:
# Default assumed endpoint — verify for your Gitea version.
endpoint = f"{gitea_url.rstrip('/')}/api/v1/actions/runners/{runner_id}"
cmd = f"curl -sS -X DELETE -H 'Authorization: token {token}' '{endpoint}'"
return cmd
def main():
p = argparse.ArgumentParser()
p.add_argument('--input', required=True, help='Path to runner list (CSV or text)')
p.add_argument('--threshold-hours', type=int, default=24, help='Consider offline > N hours')
p.add_argument('--prune', action='store_true', help='If set, execute deletions (requires GITEA_URL and GITEA_TOKEN)')
p.add_argument('--confirm-phrase', help='Required exact confirmation phrase to actually prune')
args = p.parse_args()
rows = parse_input(args.input)
if not rows:
print('No runner rows parsed from', args.input)
return 1
print(f'Parsed {len(rows)} runner entries. Threshold: {args.threshold_hours}h')
cand = candidates(rows, args.threshold_hours)
if not cand:
print('No candidates found for pruning.')
return 0
print('\nCandidates for removal:')
for r in cand:
print('-', r)
if not args.prune:
print('\nDRY RUN: To execute deletions, re-run with --prune and set GITEA_URL and GITEA_TOKEN environment variables.')
print('Example delete commands (verify endpoint before running):')
gitea = os.environ.get('GITEA_URL', 'https://gitea.example.com')
token = os.environ.get('GITEA_TOKEN', '<token>')
for r in cand:
rid = r.get('id') or r.get('ID')
if not rid:
print('# missing id for entry:', r)
continue
print(gen_delete_command(gitea, token, rid))
return 0
# prune requested
phrase = args.confirm_phrase or ''
expected = f'CONFIRM PRUNE RUNNERS: {datetime.date.today().isoformat()}'
if phrase != expected:
print('Refusing to prune. To prune, pass --confirm-phrase with exact phrase:')
print(expected)
return 2
gitea = os.environ.get('GITEA_URL')
token = os.environ.get('GITEA_TOKEN')
if not gitea or not token:
print('GITEA_URL and GITEA_TOKEN environment variables required to perform deletions.')
return 3
# execute deletions
for r in cand:
rid = r.get('id') or r.get('ID')
if not rid:
print('Skipping entry missing id:', r)
continue
cmd = gen_delete_command(gitea, token, rid)
print('Executing:', cmd)
try:
res = subprocess.run(cmd, shell=True, check=False, capture_output=True, text=True)
print('Return code:', res.returncode)
print('stdout:', res.stdout)
print('stderr:', res.stderr)
except Exception as e:
print('Error executing command for', rid, e)
return 0
if __name__ == '__main__':
raise SystemExit(main())