From ad68d505aea3a7b38064188cdbbc88773cd62413 Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Sun, 8 Mar 2026 04:14:05 +0000 Subject: [PATCH] fix: redeploy playbook paths, tags, and idempotency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix snapshot_dir: /srv/solana/snapshots → /srv/kind/solana/snapshots (kind node reads from the bind mount, not the zvol mount directly) - Fix kind-internal paths: /mnt/solana/... → /mnt/validator-... to match actual PV hostPath layout (individual mounts, not unified) - Add 'scale-up' tag to "Scale validator to 1" task for partial recovery (--tags snapshot,scale-up,verify resumes without re-running deploy) - Make 'Start deployment' idempotent: failed_when: false + follow-up check so existing deployment doesn't fail the play Co-Authored-By: Claude Opus 4.6 --- playbooks/biscayne-redeploy.yml | 50 +++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/playbooks/biscayne-redeploy.yml b/playbooks/biscayne-redeploy.yml index 86de9c75..cef45372 100644 --- a/playbooks/biscayne-redeploy.yml +++ b/playbooks/biscayne-redeploy.yml @@ -11,7 +11,7 @@ # 3. [deploy] laconic-so deployment start, then immediately scale to 0 # 4. [snapshot] Download snapshot via aria2c to host bind mount # 5. [snapshot] Verify snapshot visible inside kind node -# 6. [deploy] Scale validator back to 1 +# 6. [deploy,scale-up] Scale validator back to 1 # 7. [verify] Wait for pod Running, check logs + RPC health # # The validator cannot run during snapshot download — it would lock/use the @@ -38,6 +38,10 @@ # ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ # --tags snapshot # +# # Resume after partial failure (download snapshot, scale up, verify) +# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \ +# --tags snapshot,scale-up,verify +# - name: Redeploy agave validator on biscayne hosts: all gather_facts: false @@ -51,7 +55,7 @@ kind_cluster: laconic-70ce4c4b47e23b85 k8s_namespace: "laconic-{{ kind_cluster }}" deployment_name: "{{ kind_cluster }}-deployment" - snapshot_dir: /srv/solana/snapshots + snapshot_dir: /srv/kind/solana/snapshots ledger_dir: /srv/solana/ledger accounts_dir: /srv/solana/ramdisk/accounts ramdisk_mount: /srv/solana/ramdisk @@ -185,7 +189,7 @@ cmd: > set -o pipefail && docker exec {{ kind_cluster }}-control-plane - df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs + df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs executable: /bin/bash register: kind_ramdisk_check failed_when: kind_ramdisk_check.rc != 0 @@ -221,10 +225,31 @@ - name: Start deployment (creates kind cluster + deploys pod) ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start" - changed_when: true + register: deploy_start + changed_when: deploy_start.rc == 0 + failed_when: false timeout: 1200 tags: [deploy] + - name: Verify deployment started or already exists + ansible.builtin.command: > + kubectl get deployment {{ deployment_name }} + -n {{ k8s_namespace }} + -o jsonpath='{.metadata.name}' + register: deploy_verify + changed_when: false + failed_when: deploy_verify.rc != 0 + when: deploy_start.rc != 0 + tags: [deploy] + + - name: Show deployment start warning + ansible.builtin.debug: + msg: >- + laconic-so deployment start returned rc={{ deploy_start.rc }} + but deployment exists — continuing (idempotent). + when: deploy_start.rc != 0 and (deploy_verify.rc | default(1)) == 0 + tags: [deploy] + - name: Wait for deployment to exist ansible.builtin.command: > kubectl get deployment {{ deployment_name }} @@ -275,7 +300,7 @@ - name: Verify kind node mounts ansible.builtin.command: > docker exec {{ kind_cluster }}-control-plane - ls /mnt/solana/snapshots/ + ls /mnt/validator-snapshots/ register: kind_mount_check changed_when: false tags: [snapshot] @@ -301,7 +326,7 @@ ansible.builtin.shell: > set -o pipefail && docker exec {{ kind_cluster }}-control-plane - find /mnt/solana/snapshots/ -name '*.tar.*' -maxdepth 1 | head -5 + find /mnt/validator-snapshots/ -name '*.tar.*' -maxdepth 1 | head -5 register: kind_snapshot_check failed_when: kind_snapshot_check.stdout == "" changed_when: false @@ -320,7 +345,7 @@ kubectl scale deployment {{ deployment_name }} -n {{ k8s_namespace }} --replicas=1 changed_when: true - tags: [deploy] + tags: [deploy, scale-up] # ---- verify: confirm validator is running -------------------------------- - name: Wait for pod to be running @@ -334,10 +359,17 @@ changed_when: false tags: [verify] - - name: Verify unified mount inside kind node - ansible.builtin.command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/" + - name: Verify PV mounts inside kind node + ansible.builtin.shell: + cmd: > + set -o pipefail && + docker exec {{ kind_cluster }}-control-plane + df -T /mnt/validator-ledger /mnt/validator-accounts + /mnt/validator-snapshots /mnt/validator-log 2>&1 + executable: /bin/bash register: mount_check changed_when: false + failed_when: false tags: [verify] - name: Show mount contents