--- # Prepare biscayne host for agave validator # # Deployment layers: # 1. Base system — Docker, ZFS (out of scope) # 2. Prepare kind — /srv/kind directory exists (ZFS dataset, out of scope) # 3. laconic-so — Installs kind, mounts /srv/kind → /mnt in kind node # 4. Prepare agave — THIS PLAYBOOK # 5. Deploy agave — laconic-so deploys agave-stack into kind # # Agave requires three things from the host that kind doesn't provide: # # Invariant 1: /srv/kind/solana is XFS on a zvol (not ZFS) # Why: agave uses io_uring for async I/O. io_uring workers deadlock on # ZFS datasets (D-state in dsl_dir_tempreserve_space). XFS on a zvol # (block device) works fine. /srv/solana is NOT the zvol — it's a # directory on the ZFS dataset biscayne/DATA/srv. All data paths must # use /srv/kind/solana which is the actual zvol mount. # Persisted as: fstab entry mounting /dev/zvol/.../solana at /srv/kind/solana # # Invariant 2: /srv/kind/solana/ramdisk is tmpfs (1TB) # Why: agave accounts must be in RAM for performance. tmpfs survives # process restarts but not host reboots (same as /dev/ram0 but simpler). # Persisted as: fstab entry (no format service needed) # # This playbook checks each invariant and only acts if it's not met. # Idempotent — safe to run multiple times. # # Usage: # ansible-playbook playbooks/biscayne-prepare-agave.yml # - name: Configure OS-level services for agave hosts: all gather_facts: false become: true vars: zvol_device: /dev/zvol/biscayne/DATA/volumes/solana kind_solana_dir: /srv/kind/solana ramdisk_mount: /srv/kind/solana/ramdisk ramdisk_size: 1024G accounts_dir: /srv/kind/solana/ramdisk/accounts deployment_dir: /srv/deployments/agave tasks: # ---- cleanup legacy ramdisk services ----------------------------------------- - name: Stop and disable legacy ramdisk services ansible.builtin.systemd: name: "{{ item }}" state: stopped enabled: false loop: - format-ramdisk.service - ramdisk-accounts.service failed_when: false - name: Remove legacy ramdisk service files ansible.builtin.file: path: "/etc/systemd/system/{{ item }}" state: absent loop: - format-ramdisk.service - ramdisk-accounts.service register: legacy_units_removed # ---- fstab entries ---------------------------------------------------------- # /srv/solana is NOT the zvol — it's a directory on the ZFS dataset. # All data paths use /srv/kind/solana (the actual zvol mount). - name: Remove stale /srv/solana zvol fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+/srv/solana\s+xfs' state: absent - name: Remove stale /srv/solana/ramdisk fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^/dev/ram0\s+' state: absent - name: Remove stale kind rbind fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ kind_solana_dir }}\s+none\s+rbind' state: absent - name: Ensure zvol fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ kind_solana_dir }}\s' line: '{{ zvol_device }} {{ kind_solana_dir }} xfs defaults,nofail,x-systemd.requires=zfs-mount.service 0 0' register: fstab_zvol - name: Ensure tmpfs ramdisk fstab entry ansible.builtin.lineinfile: path: /etc/fstab regexp: '^\S+\s+{{ ramdisk_mount }}\s' line: "tmpfs {{ ramdisk_mount }} tmpfs nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }},nofail,x-systemd.requires=srv-kind-solana.mount 0 0" register: fstab_ramdisk # ---- reload systemd if anything changed -------------------------------------- - name: Reload systemd ansible.builtin.systemd: daemon_reload: true when: legacy_units_removed.changed or fstab_zvol.changed or fstab_ramdisk.changed # ---- apply device mounts now if not correct ---------------------------------- - name: Check kind zvol mount is XFS ansible.builtin.shell: cmd: > set -o pipefail && findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q xfs executable: /bin/bash register: kind_zvol_check failed_when: false changed_when: false - name: Unmount stale mounts ansible.builtin.shell: cmd: | umount {{ ramdisk_mount }} 2>/dev/null || true umount {{ kind_solana_dir }} 2>/dev/null || true executable: /bin/bash changed_when: kind_zvol_check.rc != 0 when: kind_zvol_check.rc != 0 - name: Mount zvol ansible.posix.mount: path: "{{ kind_solana_dir }}" src: "{{ zvol_device }}" fstype: xfs state: mounted when: kind_zvol_check.rc != 0 - name: Check ramdisk mount is tmpfs ansible.builtin.shell: cmd: > set -o pipefail && findmnt -n -o FSTYPE {{ ramdisk_mount }} | grep -q tmpfs executable: /bin/bash register: ramdisk_check failed_when: false changed_when: false - name: Mount tmpfs ramdisk ansible.posix.mount: path: "{{ ramdisk_mount }}" src: tmpfs fstype: tmpfs opts: "nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }}" state: mounted when: ramdisk_check.rc != 0 - name: Create accounts directory ansible.builtin.file: path: "{{ accounts_dir }}" state: directory owner: solana group: solana mode: "0755" # Docker requires shared propagation on mounts it bind-mounts into # containers. Without this, `docker start` fails with "not a shared # or slave mount". # No ansible module supports mount propagation flags; command required. - name: Ensure shared propagation on kind mounts # noqa: command-instead-of-module ansible.builtin.command: cmd: mount --make-shared {{ item }} loop: - "{{ kind_solana_dir }}" - "{{ kind_solana_dir }}/ramdisk" changed_when: false # ---- verify ----------------------------------------------------------------- - name: Verify zvol is XFS ansible.builtin.shell: cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q xfs executable: /bin/bash changed_when: false - name: Verify ramdisk is tmpfs ansible.builtin.shell: cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q tmpfs executable: /bin/bash changed_when: false - name: Verify mount contents ansible.builtin.shell: cmd: > set -o pipefail && ls {{ kind_solana_dir }}/ledger {{ kind_solana_dir }}/snapshots {{ ramdisk_mount }}/accounts 2>&1 | head -5 executable: /bin/bash register: kind_mount_verify changed_when: false # Assert the kind node sees XFS at the PV mount paths. # laconic-so creates individual extraMounts per volume: # /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node) # /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts # The PV hostPaths use /mnt/, not /mnt/solana/. - name: Read cluster-id from deployment ansible.builtin.shell: cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}' executable: /bin/bash register: cluster_id_result changed_when: false - name: Check kind node filesystem visibility ansible.builtin.shell: cmd: > set -o pipefail && docker exec {{ cluster_id_result.stdout }}-control-plane df -T /mnt/validator-ledger /mnt/validator-accounts executable: /bin/bash register: kind_fstype changed_when: false failed_when: false - name: Show status ansible.builtin.debug: msg: kind_mount: "{{ kind_mount_verify.stdout_lines }}" kind_fstype: "{{ kind_fstype.stdout_lines | default([]) }}" - name: Configure Ashburn validator relay ansible.builtin.import_playbook: ashburn-relay-biscayne.yml