fix: redeploy playbook paths, tags, and idempotency

- Fix snapshot_dir: /srv/solana/snapshots → /srv/kind/solana/snapshots
  (kind node reads from the bind mount, not the zvol mount directly)
- Fix kind-internal paths: /mnt/solana/... → /mnt/validator-... to match
  actual PV hostPath layout (individual mounts, not unified)
- Add 'scale-up' tag to "Scale validator to 1" task for partial recovery
  (--tags snapshot,scale-up,verify resumes without re-running deploy)
- Make 'Start deployment' idempotent: failed_when: false + follow-up
  check so existing deployment doesn't fail the play

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
fix/kind-mount-propagation
A. F. Dudley 2026-03-08 04:14:05 +00:00
parent 05f9acf8a0
commit ad68d505ae
1 changed files with 41 additions and 9 deletions

View File

@ -11,7 +11,7 @@
# 3. [deploy] laconic-so deployment start, then immediately scale to 0
# 4. [snapshot] Download snapshot via aria2c to host bind mount
# 5. [snapshot] Verify snapshot visible inside kind node
# 6. [deploy] Scale validator back to 1
# 6. [deploy,scale-up] Scale validator back to 1
# 7. [verify] Wait for pod Running, check logs + RPC health
#
# The validator cannot run during snapshot download — it would lock/use the
@ -38,6 +38,10 @@
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
# --tags snapshot
#
# # Resume after partial failure (download snapshot, scale up, verify)
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
# --tags snapshot,scale-up,verify
#
- name: Redeploy agave validator on biscayne
hosts: all
gather_facts: false
@ -51,7 +55,7 @@
kind_cluster: laconic-70ce4c4b47e23b85
k8s_namespace: "laconic-{{ kind_cluster }}"
deployment_name: "{{ kind_cluster }}-deployment"
snapshot_dir: /srv/solana/snapshots
snapshot_dir: /srv/kind/solana/snapshots
ledger_dir: /srv/solana/ledger
accounts_dir: /srv/solana/ramdisk/accounts
ramdisk_mount: /srv/solana/ramdisk
@ -185,7 +189,7 @@
cmd: >
set -o pipefail &&
docker exec {{ kind_cluster }}-control-plane
df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs
df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs
executable: /bin/bash
register: kind_ramdisk_check
failed_when: kind_ramdisk_check.rc != 0
@ -221,10 +225,31 @@
- name: Start deployment (creates kind cluster + deploys pod)
ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start"
changed_when: true
register: deploy_start
changed_when: deploy_start.rc == 0
failed_when: false
timeout: 1200
tags: [deploy]
- name: Verify deployment started or already exists
ansible.builtin.command: >
kubectl get deployment {{ deployment_name }}
-n {{ k8s_namespace }}
-o jsonpath='{.metadata.name}'
register: deploy_verify
changed_when: false
failed_when: deploy_verify.rc != 0
when: deploy_start.rc != 0
tags: [deploy]
- name: Show deployment start warning
ansible.builtin.debug:
msg: >-
laconic-so deployment start returned rc={{ deploy_start.rc }}
but deployment exists — continuing (idempotent).
when: deploy_start.rc != 0 and (deploy_verify.rc | default(1)) == 0
tags: [deploy]
- name: Wait for deployment to exist
ansible.builtin.command: >
kubectl get deployment {{ deployment_name }}
@ -275,7 +300,7 @@
- name: Verify kind node mounts
ansible.builtin.command: >
docker exec {{ kind_cluster }}-control-plane
ls /mnt/solana/snapshots/
ls /mnt/validator-snapshots/
register: kind_mount_check
changed_when: false
tags: [snapshot]
@ -301,7 +326,7 @@
ansible.builtin.shell: >
set -o pipefail &&
docker exec {{ kind_cluster }}-control-plane
find /mnt/solana/snapshots/ -name '*.tar.*' -maxdepth 1 | head -5
find /mnt/validator-snapshots/ -name '*.tar.*' -maxdepth 1 | head -5
register: kind_snapshot_check
failed_when: kind_snapshot_check.stdout == ""
changed_when: false
@ -320,7 +345,7 @@
kubectl scale deployment {{ deployment_name }}
-n {{ k8s_namespace }} --replicas=1
changed_when: true
tags: [deploy]
tags: [deploy, scale-up]
# ---- verify: confirm validator is running --------------------------------
- name: Wait for pod to be running
@ -334,10 +359,17 @@
changed_when: false
tags: [verify]
- name: Verify unified mount inside kind node
ansible.builtin.command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/"
- name: Verify PV mounts inside kind node
ansible.builtin.shell:
cmd: >
set -o pipefail &&
docker exec {{ kind_cluster }}-control-plane
df -T /mnt/validator-ledger /mnt/validator-accounts
/mnt/validator-snapshots /mnt/validator-log 2>&1
executable: /bin/bash
register: mount_check
changed_when: false
failed_when: false
tags: [verify]
- name: Show mount contents