stack-orchestrator/playbooks/biscayne-migrate-storage.yml

236 lines
8.3 KiB
YAML

---
# One-time migration: zvol/XFS → ZFS dataset for /srv/kind/solana
#
# Background:
# Biscayne used a ZFS zvol formatted as XFS to work around io_uring/ZFS
# deadlocks. With ZFS upgraded to 2.2.9 (io_uring fix) and graceful
# shutdown via admin RPC, the zvol/XFS layer is unnecessary overhead.
#
# What this does:
# 1. Stops docker to release all bind mounts referencing /srv/kind
# 2. Unmounts the zvol and any leftover temp mounts
# 3. Creates a ZFS dataset at biscayne/DATA/srv/kind/solana (if needed)
# 4. Destroys the zvol (no data copy — stale data, fresh snapshot on restart)
# 5. Updates fstab, mounts ramdisk, creates directories
# 6. Restarts docker (kind cluster comes back)
#
# Usage:
# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-migrate-storage.yml
#
# After migration, rebuild the container image with biscayne-sync-tools.yml
# --tags build-container, then start the validator with biscayne-recover.yml.
#
- name: Migrate storage from zvol/XFS to ZFS dataset
hosts: all
gather_facts: false
become: true
vars:
kind_cluster: laconic-70ce4c4b47e23b85
zvol_device: /dev/zvol/biscayne/DATA/volumes/solana
zvol_dataset: biscayne/DATA/volumes/solana
new_dataset: biscayne/DATA/srv/kind/solana
kind_solana_dir: /srv/kind/solana
ramdisk_mount: /srv/kind/solana/ramdisk
ramdisk_size: 1024G
zvol_tmp_mount: /mnt/zvol-migration-tmp
tasks:
# ---- assess current state ---------------------------------------------------
- name: Check if zvol device exists
ansible.builtin.stat:
path: "{{ zvol_device }}"
register: zvol_exists
- name: Check if ZFS dataset already exists
ansible.builtin.command: zfs list -H -o name {{ new_dataset }}
register: dataset_exists
failed_when: false
changed_when: false
- name: Check current mount type at {{ kind_solana_dir }}
ansible.builtin.shell:
cmd: set -o pipefail && findmnt -n -o FSTYPE {{ kind_solana_dir }}
executable: /bin/bash
register: current_fstype
failed_when: false
changed_when: false
- name: Check if temp zvol mount exists
ansible.builtin.shell:
cmd: set -o pipefail && findmnt -n {{ zvol_tmp_mount }}
executable: /bin/bash
register: tmp_mount_exists
failed_when: false
changed_when: false
- name: Report current state
ansible.builtin.debug:
msg:
zvol_exists: "{{ zvol_exists.stat.exists | default(false) }}"
dataset_exists: "{{ dataset_exists.rc == 0 }}"
current_fstype: "{{ current_fstype.stdout | default('none') }}"
temp_mount: "{{ tmp_mount_exists.rc == 0 }}"
- name: End play if already migrated
ansible.builtin.meta: end_play
when:
- dataset_exists.rc == 0
- current_fstype.stdout | default('') == 'zfs'
- not (zvol_exists.stat.exists | default(false))
# ---- stop docker to release all /srv/kind references -----------------------
- name: Stop docker (releases kind bind mounts to /srv/kind)
ansible.builtin.systemd:
name: docker
state: stopped
register: docker_stopped
changed_when: docker_stopped.changed
- name: Stop docker socket
ansible.builtin.systemd:
name: docker.socket
state: stopped
# ---- unmount everything referencing the zvol --------------------------------
- name: Unmount temp zvol mount (leftover from interrupted migration)
ansible.posix.mount:
path: "{{ zvol_tmp_mount }}"
state: unmounted
when: tmp_mount_exists.rc == 0
- name: Remove temp mount directory
ansible.builtin.file:
path: "{{ zvol_tmp_mount }}"
state: absent
- name: Unmount ramdisk if mounted
ansible.posix.mount:
path: "{{ ramdisk_mount }}"
state: unmounted
failed_when: false
- name: Unmount zvol from {{ kind_solana_dir }}
ansible.posix.mount:
path: "{{ kind_solana_dir }}"
state: unmounted
when: current_fstype.stdout | default('') == 'xfs'
# ---- create ZFS dataset if needed ------------------------------------------
- name: Create ZFS dataset {{ new_dataset }}
ansible.builtin.command: >
zfs create -o mountpoint={{ kind_solana_dir }} {{ new_dataset }}
changed_when: true
when: dataset_exists.rc != 0
- name: Mount ZFS dataset if it already existed but isn't mounted
ansible.builtin.command: zfs mount {{ new_dataset }}
changed_when: true
failed_when: false
when: dataset_exists.rc == 0
- name: Verify ZFS dataset is mounted
ansible.builtin.shell:
cmd: set -o pipefail && findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q zfs
executable: /bin/bash
changed_when: false
# ---- destroy zvol -----------------------------------------------------------
- name: Destroy zvol {{ zvol_dataset }}
ansible.builtin.command: zfs destroy -r {{ zvol_dataset }}
changed_when: true
when: zvol_exists.stat.exists | default(false)
# ---- create directory structure on new dataset ------------------------------
- name: Create solana data directories
ansible.builtin.file:
path: "{{ kind_solana_dir }}/{{ item }}"
state: directory
mode: "0755"
loop:
- ledger
- snapshots
- log
- ramdisk
# ---- update fstab -----------------------------------------------------------
- name: Remove zvol fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^\S+zvol\S+\s+{{ kind_solana_dir }}\s'
state: absent
- name: Remove any XFS fstab entry for {{ kind_solana_dir }}
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^\S+\s+{{ kind_solana_dir }}\s+xfs'
state: absent
- name: Update tmpfs ramdisk fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^\S+\s+{{ ramdisk_mount }}\s'
line: "tmpfs {{ ramdisk_mount }} tmpfs nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }},nofail,x-systemd.requires=zfs-mount.service 0 0"
- name: Reload systemd
ansible.builtin.systemd:
daemon_reload: true
# ---- mount ramdisk ----------------------------------------------------------
- name: Mount tmpfs ramdisk
ansible.posix.mount:
path: "{{ ramdisk_mount }}"
src: tmpfs
fstype: tmpfs
opts: "nodev,nosuid,noexec,nodiratime,size={{ ramdisk_size }}"
state: mounted
- name: Ensure accounts directory on ramdisk
ansible.builtin.file:
path: "{{ ramdisk_mount }}/accounts"
state: directory
mode: "0755"
# ---- restart docker (brings kind back) -------------------------------------
- name: Start docker
ansible.builtin.systemd:
name: docker
state: started
- name: Wait for kind node container
ansible.builtin.command: docker inspect -f '{{ '{{' }}.State.Running{{ '}}' }}' {{ kind_cluster }}-control-plane
register: kind_running
changed_when: false
retries: 12
delay: 5
until: kind_running.stdout == 'true'
# ---- verification -----------------------------------------------------------
- name: Verify solana dir is ZFS
ansible.builtin.shell:
cmd: set -o pipefail && findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q zfs
executable: /bin/bash
changed_when: false
- name: Verify ramdisk is tmpfs
ansible.builtin.shell:
cmd: set -o pipefail && findmnt -n -o FSTYPE {{ ramdisk_mount }} | grep -q tmpfs
executable: /bin/bash
changed_when: false
- name: Verify zvol is gone
ansible.builtin.command: zfs list -H -o name {{ zvol_dataset }}
register: zvol_gone
failed_when: zvol_gone.rc == 0
changed_when: false
- name: Migration complete
ansible.builtin.debug:
msg: >-
Storage migration complete.
{{ kind_solana_dir }} is now ZFS dataset {{ new_dataset }}.
Ramdisk at {{ ramdisk_mount }} (tmpfs, {{ ramdisk_size }}).
zvol {{ zvol_dataset }} destroyed. Data intentionally not copied
(stale) — download fresh snapshot on next start.
Next: biscayne-sync-tools.yml --tags build-container, then
biscayne-recover.yml.