homelab_cluster/ansible/k3s_setup.yml

293 lines
11 KiB
YAML

---
- name: Remove old and add new SSH host keys for all nodes
hosts: localhost
vars:
nodes:
- 192.168.124.100
- 192.168.124.101
- 192.168.124.102
tasks:
- name: Remove old host key for all nodes
ansible.builtin.known_hosts:
path: "~/.ssh/known_hosts"
name: "{{ item }}"
state: absent
loop: "{{ nodes }}"
- name: Prepare K3s Nodes (Disable conflicting services)
hosts: all
become: yes
gather_facts: no
tasks:
- name: Installiere iSCSI-Initiator (Longhorn-Voraussetzung)
become: true
ansible.builtin.package:
name: iscsi-initiator-utils
state: present
- name: Enable iSCSI-Initiator
become: true
ansible.builtin.systemd_service:
name: iscsid
enabled: true
masked: no
- name: Make sure a service unit is running
become: true
ansible.builtin.systemd_service:
state: started
name: iscsid
- name: Check if nm-cloud-setup service exists
ansible.builtin.systemd:
name: nm-cloud-setup.service
register: nm_cloud_setup_service_check
ignore_errors: yes
- name: Disable nm-cloud-setup service if it exists
ansible.builtin.systemd:
name: nm-cloud-setup.service
enabled: no
state: stopped
when: nm_cloud_setup_service_check.status is defined and nm_cloud_setup_service_check.status.LoadState != 'not-found'
register: nm_service_disabled
- name: Check if nm-cloud-setup timer exists
ansible.builtin.systemd:
name: nm-cloud-setup.timer
register: nm_cloud_setup_timer_check
ignore_errors: yes
- name: Disable nm-cloud-setup timer if it exists
ansible.builtin.systemd:
name: nm-cloud-setup.timer
enabled: no
state: stopped
when: nm_cloud_setup_timer_check.status is defined and nm_cloud_setup_timer_check.status.LoadState != 'not-found'
register: nm_timer_disabled
- name: Reboot node to apply changes and wait for it to come back
ansible.builtin.reboot:
msg: "Rebooting node after disabling nm-cloud-setup"
connect_timeout: 5
reboot_timeout: 600
pre_reboot_delay: 2
post_reboot_delay: 30
test_command: whoami
when: nm_service_disabled.changed or nm_timer_disabled.changed
- name: Install K3s Server
hosts: k3s_server
become: yes
gather_facts: yes
tasks:
- name: Ensure default interface fact is available
ansible.builtin.fail:
msg: "Could not determine the default network interface name (ansible_default_ipv4.interface)."
when: ansible_default_ipv4.interface is not defined
- name: Download K3s installation script
ansible.builtin.get_url:
url: https://get.k3s.io
dest: /tmp/install-k3s.sh
mode: "0755"
register: download_result
- name: Install K3s Server using the downloaded script
ansible.builtin.command: >
sh /tmp/install-k3s.sh server
--write-kubeconfig-mode "644"
--node-ip "{{ ansible_default_ipv4.address }}"
--flannel-iface "{{ ansible_default_ipv4.interface }}"
args:
creates: /etc/rancher/k3s/k3s.yaml
register: k3s_install_result
changed_when: k3s_install_result.rc == 0 and ("Applying" in k3s_install_result.stdout or "Starting" in k3s_install_result.stdout)
failed_when: k3s_install_result.rc != 0
- name: Show K3s install script execution details
ansible.builtin.debug:
var: k3s_install_result
verbosity: 1
- name: Reload systemd configuration if install script ran successfully
ansible.builtin.systemd:
daemon_reload: yes
when: k3s_install_result.rc == 0 and k3s_install_result.changed
- name: Pause briefly after installation attempt if script ran successfully
ansible.builtin.pause:
seconds: 10
when: k3s_install_result.rc == 0 and k3s_install_result.changed
- name: Check K3s service status (run always after attempt)
ansible.builtin.command: systemctl is-active k3s
register: k3s_service_status
changed_when: false
failed_when: false
ignore_errors: yes
- name: Show K3s service status output
ansible.builtin.debug:
msg: "K3s service status check result: {{ k3s_service_status.stdout | default('N/A') }} (RC={{ k3s_service_status.rc | default('N/A') }})"
- name: Fail if K3s service check failed after successful install script run
ansible.builtin.fail:
msg: "K3s install script reported success (RC=0), but service check failed (RC={{ k3s_service_status.rc }}). Check logs on {{ inventory_hostname }} with 'journalctl -u k3s'."
when:
- k3s_install_result.rc == 0
- k3s_service_status.rc is defined
- k3s_service_status.rc != 0
- k3s_service_status.rc != 3
- name: Start K3s service if it is inactive
ansible.builtin.systemd:
name: k3s
state: started
when: k3s_service_status.rc is defined and k3s_service_status.rc == 3
register: k3s_start_result
- name: Re-check K3s service status after start attempt
ansible.builtin.command: systemctl is-active k3s
register: k3s_service_status_after_start
changed_when: false
failed_when: false
when: k3s_start_result is defined and k3s_start_result.changed
- name: Show K3s service status after start attempt
ansible.builtin.debug:
msg: "K3s service status after start attempt: {{ k3s_service_status_after_start.stdout | default('N/A') }} (RC={{ k3s_service_status_after_start.rc | default('N/A') }})"
when: k3s_start_result is defined and k3s_start_result.changed
- name: Fail if K3s service is not active after potential start
ansible.builtin.fail:
msg: "K3s service could not be started. Check logs on {{ inventory_hostname }} with 'journalctl -u k3s'."
when: >
(k3s_service_status.rc is defined and k3s_service_status.rc != 0) and
(k3s_start_result is not defined or not k3s_start_result.changed or (k3s_service_status_after_start.rc is defined and k3s_service_status_after_start.rc != 0))
- name: Set flag indicating K3s service is active
ansible.builtin.set_fact:
k3s_service_active: true
when: >
(k3s_service_status.rc is defined and k3s_service_status.rc == 0) or
(k3s_start_result is defined and k3s_start_result.changed and k3s_service_status_after_start.rc is defined and k3s_service_status_after_start.rc == 0)
- name: Wait for K3s server node-token to be created
ansible.builtin.wait_for:
path: /var/lib/rancher/k3s/server/node-token
timeout: 120
delay: 5
when: k3s_service_active | default(false)
- name: Read the K3s node-token from server
ansible.builtin.slurp:
src: /var/lib/rancher/k3s/server/node-token
register: k3s_token_content
when: k3s_service_active | default(false)
- name: Store K3s token and server IP for agents
ansible.builtin.set_fact:
k3s_token: "{{ k3s_token_content.content | b64decode | trim }}"
k3s_server_ip: "{{ ansible_default_ipv4.address }}"
cacheable: yes
when: k3s_service_active | default(false) and k3s_token_content.content is defined
- name: Install K3s Agents
hosts: k3s_agents
become: yes
gather_facts: yes
tasks:
- name: Ensure default interface fact is available
ansible.builtin.fail:
msg: "Could not determine the default network interface name (ansible_default_ipv4.interface)."
when: ansible_default_ipv4.interface is not defined
- name: Check if K3s server facts are available
ansible.builtin.fail:
msg: "K3s server IP or token not found. Ensure server play ran successfully and the server is running."
when: hostvars[groups['k3s_server'][0]].k3s_server_ip is not defined or hostvars[groups['k3s_server'][0]].k3s_token is not defined
- name: Debug Agent Environment Variables
ansible.builtin.debug:
msg:
- "Attempting to use K3S_URL=https://{{ hostvars[groups['k3s_server'][0]].k3s_server_ip }}:6443"
- "Attempting to use K3S_TOKEN={{ hostvars[groups['k3s_server'][0]].k3s_token }}"
- name: Remove previous K3s agent installation (optional, idempotent)
ansible.builtin.shell: |
/usr/local/bin/k3s-agent-uninstall.sh || true
args:
removes: /usr/local/bin/k3s-agent-uninstall.sh
ignore_errors: yes
- name: Download K3s installation script
ansible.builtin.get_url:
url: https://get.k3s.io
dest: /tmp/install-k3s.sh
mode: "0755"
- name: Install K3s Agent using the downloaded script
ansible.builtin.command: >
sh /tmp/install-k3s.sh agent
--node-ip "{{ ansible_default_ipv4.address }}"
--flannel-iface "{{ ansible_default_ipv4.interface }}"
--with-node-id
environment:
K3S_URL: "https://{{ hostvars[groups['k3s_server'][0]].k3s_server_ip }}:6443"
K3S_TOKEN: "{{ hostvars[groups['k3s_server'][0]].k3s_token }}"
args:
creates: /etc/rancher/k3s/config.yaml
register: k3s_agent_install_result
changed_when: k3s_agent_install_result.rc == 0
failed_when: k3s_agent_install_result.rc != 0
- name: Show K3s agent install script execution details
ansible.builtin.debug:
var: k3s_agent_install_result
verbosity: 1
- name: Get Kubeconfig from Server (optional)
hosts: k3s_server
gather_facts: no
tasks:
- name: Fetch Kubeconfig file from K3s server
ansible.builtin.fetch:
src: /etc/rancher/k3s/k3s.yaml
dest: ../kubeconfig
flat: yes
become: yes
when: k3s_service_active | default(false)
- name: Replace server IP in kubeconfig with current server IP
hosts: localhost
gather_facts: no
vars:
kubeconfig_path: "../kubeconfig"
server_ip: "{{ hostvars[groups['k3s_server'][0]].k3s_server_ip }}"
tasks:
- name: Replace server IP in kubeconfig file
ansible.builtin.replace:
path: "{{ kubeconfig_path }}"
regexp: "server: https://.*:6443"
replace: "server: https://{{ server_ip }}:6443"
- name: Update /etc/hosts with k3s-server.local
hosts: localhost
become: yes
vars:
server_ip: "{{ hostvars[groups['k3s_server'][0]].k3s_server_ip }}"
service_hosts:
- sonarqube.local
- gitea.local
- prometheus.local
- grafana.local
- longhorn.local
tasks:
- name: Ensure all service hostnames point to the server IP in /etc/hosts
ansible.builtin.lineinfile:
path: /etc/hosts
regexp: '^{{ server_ip }}\s+sonarqube\.local gitea\.local prometheus\.local grafana\.local longhorn\.local$'
line: "{{ server_ip }} {{ service_hosts | join(' ') }}"
state: present
create: yes