stack-orchestrator/playbooks/biscayne-start.yml

138 lines
4.7 KiB
YAML

---
# Start agave validator on biscayne
#
# Ensures the kind container is running, verifies XFS mounts are visible
# inside the kind node, then scales the deployment to 1.
#
# Prerequisites:
# - biscayne-prepare-agave.yml has been run (fstab entries, systemd units)
# - A snapshot exists in /srv/kind/solana/snapshots (or use biscayne-recover.yml)
#
# Usage:
# ansible-playbook playbooks/biscayne-start.yml
#
- name: Start agave validator
hosts: all
gather_facts: false
environment:
KUBECONFIG: /home/rix/.kube/config
vars:
deployment_dir: /srv/deployments/agave
tasks:
# ---- discover cluster id -------------------------------------------------
- name: Read cluster-id from deployment
ansible.builtin.shell:
cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}'
executable: /bin/bash
register: cluster_id_result
changed_when: false
- name: Set cluster facts
ansible.builtin.set_fact:
kind_cluster: "{{ cluster_id_result.stdout }}"
kind_node: "{{ cluster_id_result.stdout }}-control-plane"
k8s_namespace: "laconic-{{ cluster_id_result.stdout }}"
deployment_name: "{{ cluster_id_result.stdout }}-deployment"
# ---- ensure kind container is running ------------------------------------
- name: Check kind container state
ansible.builtin.command: docker inspect -f '{% raw %}{{ .State.Running }}{% endraw %}' {{ kind_node }}
register: kind_running
failed_when: false
changed_when: false
- name: Start kind container
ansible.builtin.command: docker start {{ kind_node }}
when: kind_running.stdout | default('false') != 'true'
changed_when: true
- name: Wait for kind node ready
ansible.builtin.command: >
kubectl get node {{ kind_node }}
-o jsonpath='{.status.conditions[?(@.type=="Ready")].status}'
register: node_ready
changed_when: false
retries: 30
delay: 10
until: node_ready.stdout == "True"
# ---- verify mounts inside kind node --------------------------------------
# laconic-so creates individual extraMounts per volume:
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
- name: Verify kind node sees correct filesystems at PV paths
ansible.builtin.shell:
cmd: >
set -o pipefail &&
docker exec {{ kind_node }}
df -T /mnt/validator-ledger /mnt/validator-accounts
executable: /bin/bash
register: kind_fs_check
changed_when: false
- name: Fail if ledger is not XFS (zvol)
ansible.builtin.fail:
msg: >-
validator-ledger must be XFS (on zvol). Got:
{{ kind_fs_check.stdout }}
when: "'xfs' not in kind_fs_check.stdout"
- name: Fail if accounts is on ZFS (must be tmpfs)
ansible.builtin.shell:
cmd: >
set -o pipefail &&
docker exec {{ kind_node }}
df -T /mnt/validator-accounts | grep -q zfs
executable: /bin/bash
register: accounts_zfs_check
changed_when: false
failed_when: accounts_zfs_check.rc == 0
- name: Show kind node PV filesystems
ansible.builtin.shell:
cmd: |
docker exec {{ kind_node }} df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log
executable: /bin/bash
register: kind_df
changed_when: false
- name: Show kind mount info
ansible.builtin.debug:
var: kind_df.stdout_lines
# ---- scale up ------------------------------------------------------------
- name: Get current replica count
ansible.builtin.command: >
kubectl get deployment {{ deployment_name }}
-n {{ k8s_namespace }}
-o jsonpath='{.spec.replicas}'
register: current_replicas
failed_when: false
changed_when: false
- name: Scale deployment to 1
ansible.builtin.command: >
kubectl scale deployment {{ deployment_name }}
-n {{ k8s_namespace }} --replicas=1
when: current_replicas.stdout | default('0') | int == 0
changed_when: true
- name: Wait for pod running
ansible.builtin.command: >
kubectl get pods -n {{ k8s_namespace }}
-l app={{ deployment_name }}
-o jsonpath='{.items[0].status.phase}'
register: pod_phase
changed_when: false
retries: 30
delay: 10
until: pod_phase.stdout == "Running"
- name: Report started
ansible.builtin.debug:
msg: >-
Validator started. Kind node: {{ kind_node }}.
Pod phase: {{ pod_phase.stdout }}.
PV mounts: XFS (zvol for ledger/snapshots/log, ram0 for accounts).