--- # ============================================================================ # AI WORKSTATION BOOTSTRAP PLAYBOOK # ============================================================================ # Purpose: Prepare fresh Ubuntu installations for AI/ML workloads # Targets: ai_grid inventory group (NVIDIA GPU-equipped machines) # ============================================================================ - name: Bootstrap AI workstation (GPU + Ollama + Storage) hosts: ai_grid become: true vars: # Ollama network configuration ollama_host: "0.0.0.0:11434" # Listen on all interfaces ollama_port: 11434 # Essential packages for AI workstations essential_packages: - build-essential # Compiler and build tools - git # Version control - curl # HTTP client - wget # Download utility - htop # System monitoring - nvtop # GPU monitoring (NVIDIA) - python3-pip # Python package manager - python3-venv # Python virtual environments - net-tools # Network utilities - nfs-common # NFS client support tasks: # ======================================================================== # PHASE 1: SYSTEM BASELINE # ======================================================================== - name: Update apt cache ansible.builtin.apt: update_cache: true cache_valid_time: 3600 tags: [baseline, update] - name: Upgrade all installed packages ansible.builtin.apt: upgrade: dist autoremove: true autoclean: true register: upgrade_result tags: [baseline, update] # ======================================================================== # PHASE 2: ESSENTIAL UTILITIES # ======================================================================== - name: Install essential utilities and development tools ansible.builtin.apt: name: "{{ essential_packages }}" state: present tags: [baseline, utilities] # ======================================================================== # PHASE 2.5: IDENTITY MANAGEMENT # ======================================================================== # Purpose: Ensure the 'chester' admin user exists with proper access # Why: Allows the playbook to bootstrap from a fresh Ubuntu install # without manual user creation # ======================================================================== - name: Create chester identity and access block: - name: Install sudo package ansible.builtin.apt: name: sudo state: present update_cache: false - name: Ensure chester group exists ansible.builtin.group: name: chester state: present - name: Create chester user with sudo access ansible.builtin.user: name: chester group: chester groups: sudo shell: /bin/bash password: '!' password_lock: true comment: "Homelab Administrator" - name: Deploy SSH key to chester user ansible.posix.authorized_key: user: chester state: present key: "{{ lookup('file', '~/.ssh/id_ed25519.pub') }}" - name: Allow chester to use sudo without password ansible.builtin.copy: dest: /etc/sudoers.d/chester content: "chester ALL=(ALL) NOPASSWD: ALL\n" mode: '0440' owner: root group: root validate: '/usr/sbin/visudo -cf %s' tags: [identity, baseline] # ======================================================================== # PHASE 3: NVIDIA DRIVERS # ======================================================================== - name: Install ubuntu-drivers-common package ansible.builtin.apt: name: ubuntu-drivers-common state: present tags: [gpu, nvidia] - name: Detect and install recommended NVIDIA drivers ansible.builtin.command: ubuntu-drivers autoinstall args: creates: /usr/bin/nvidia-smi register: nvidia_install changed_when: false tags: [gpu, nvidia] - name: Verify NVIDIA driver installation ansible.builtin.command: nvidia-smi register: nvidia_check failed_when: false changed_when: false tags: [gpu, nvidia, verify] - name: Display NVIDIA driver status ansible.builtin.debug: msg: "{{ nvidia_check.stdout_lines }}" when: nvidia_check.rc == 0 tags: [gpu, nvidia, verify] # ======================================================================== # PHASE 3.5: LAPTOP TUNING & SAFETY # ======================================================================== - name: Configure GRUB for ASPM & Intel hybrid cores ansible.builtin.lineinfile: path: /etc/default/grub regexp: '^GRUB_CMDLINE_LINUX_DEFAULT=' line: 'GRUB_CMDLINE_LINUX_DEFAULT="quiet pcie_aspm=force intel_pstate=passive"' notify: Update Grub tags: [laptop, tuning] - name: Configure logind to ignore lid-close events ansible.builtin.lineinfile: path: /etc/systemd/logind.conf regexp: "^#?{{ item.key }}=" line: "{{ item.key }}={{ item.value }}" loop: - { key: "HandleLidSwitch", value: "ignore" } - { key: "HandleLidSwitchExternalPower", value: "ignore" } notify: Restart Logind tags: [laptop, safety] - name: Mask sleep targets to keep workloads running ansible.builtin.systemd: name: "{{ item }}" masked: true loop: - sleep.target - suspend.target - hibernate.target - hybrid-sleep.target tags: [laptop, safety] - name: Disable swap to protect NVMe under sustained load ansible.builtin.shell: | swapoff -a sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab when: ansible_swaptotal_mb > 0 changed_when: false tags: [storage, tuning] - name: Check Intel Thread Director support messages ansible.builtin.shell: "dmesg | grep -i 'Hardware Feedback Interface'" register: hfi_check failed_when: false changed_when: false tags: [verify, laptop] # ======================================================================== # PHASE 4: OLLAMA INSTALLATION # ======================================================================== - name: Check if Ollama is already installed ansible.builtin.stat: path: /usr/local/bin/ollama register: ollama_binary tags: [ollama] - name: Download Ollama installation script ansible.builtin.get_url: url: https://ollama.ai/install.sh dest: /tmp/ollama-install.sh mode: '0755' when: not ollama_binary.stat.exists tags: [ollama] - name: Install Ollama ansible.builtin.command: /tmp/ollama-install.sh when: not ollama_binary.stat.exists changed_when: false tags: [ollama] - name: Create systemd override directory for Ollama ansible.builtin.file: path: /etc/systemd/system/ollama.service.d state: directory mode: '0755' tags: [ollama, network] - name: Configure Ollama to listen on all network interfaces ansible.builtin.copy: dest: /etc/systemd/system/ollama.service.d/override.conf content: | [Service] Environment="OLLAMA_HOST={{ ollama_host }}" mode: '0644' notify: Restart ollama tags: [ollama, network] - name: Ensure Ollama service is enabled and started ansible.builtin.systemd: name: ollama state: started enabled: true daemon_reload: true tags: [ollama] - name: Apply pending Ollama handler changes before readiness check ansible.builtin.meta: flush_handlers tags: [ollama] - name: Restart Ollama to apply network binding ansible.builtin.systemd: name: ollama state: restarted daemon_reload: true tags: [ollama] - name: Wait for Ollama service to be ready ansible.builtin.wait_for: host: "{{ ansible_host }}" port: "{{ ollama_port }}" delay: 5 timeout: 30 tags: [ollama, verify] # ======================================================================== # PHASE 5: NFS STORAGE MOUNTS (TODO) # ======================================================================== # Instructions: # 1. Define NFS server variables in group_vars/ai_grid.yml: # nfs_server: "10.0.0.249" # nfs_export: "/volume1/ai-datasets" # nfs_mount_point: "/mnt/ai-datasets" # # 2. Uncomment the tasks below and customize paths # ======================================================================== # - name: Create NFS mount point directory # ansible.builtin.file: # path: "{{ nfs_mount_point }}" # state: directory # owner: "{{ ansible_user }}" # group: "{{ ansible_user }}" # mode: '0755' # tags: [storage, nfs] # # - name: Mount NFS share for AI datasets # ansible.posix.mount: # path: "{{ nfs_mount_point }}" # src: "{{ nfs_server }}:{{ nfs_export }}" # fstype: nfs # opts: defaults,nfsvers=4 # state: mounted # tags: [storage, nfs] # # - name: Verify NFS mount is accessible # ansible.builtin.command: "ls -la {{ nfs_mount_point }}" # register: nfs_verify # changed_when: false # tags: [storage, nfs, verify] # ======================================================================== # PHASE 6: POST-INSTALL VERIFICATION # ======================================================================== - name: Check if system reboot is required ansible.builtin.stat: path: /var/run/reboot-required register: reboot_required tags: [verify, reboot] - name: Display reboot notification if needed ansible.builtin.debug: msg: | ╔════════════════════════════════════════════════════════════════╗ ║ WARNING: System reboot is required to complete installation ║ ║ Reason: Kernel or driver updates ║ ║ Action: Please reboot this host manually ║ ╚════════════════════════════════════════════════════════════════╝ when: reboot_required.stat.exists tags: [verify, reboot] - name: Display bootstrap completion summary ansible.builtin.debug: msg: - "╔════════════════════════════════════════════════════════════════╗" - "║ AI Workstation Bootstrap Complete! ║" - "╠════════════════════════════════════════════════════════════════╣" - "║ ✓ System updated and essential utilities installed ║" - "║ ✓ NVIDIA drivers installed (verify with nvidia-smi) ║" - "║ ✓ Ollama installed and network-accessible ║" - "║ → Ollama API: http://{{ ansible_host }}:{{ ollama_port }} ║" - "╠════════════════════════════════════════════════════════════════╣" - "║ Next Steps: ║" - "║ 1. Reboot if required (check above) ║" - "║ 2. Pull models: ollama pull llama3.1:8b ║" - "║ 3. Configure NFS mounts (see Phase 5 in playbook) ║" - "╚════════════════════════════════════════════════════════════════╝" tags: [verify] # ========================================================================== # HANDLERS # ========================================================================== handlers: - name: Restart ollama ansible.builtin.systemd: name: ollama state: restarted daemon_reload: true - name: Update Grub ansible.builtin.command: update-grub changed_when: false - name: Restart Logind ansible.builtin.systemd: name: systemd-logind state: restarted