fix: redeploy playbook paths, tags, and idempotency
- Fix snapshot_dir: /srv/solana/snapshots → /srv/kind/solana/snapshots (kind node reads from the bind mount, not the zvol mount directly) - Fix kind-internal paths: /mnt/solana/... → /mnt/validator-... to match actual PV hostPath layout (individual mounts, not unified) - Add 'scale-up' tag to "Scale validator to 1" task for partial recovery (--tags snapshot,scale-up,verify resumes without re-running deploy) - Make 'Start deployment' idempotent: failed_when: false + follow-up check so existing deployment doesn't fail the play Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>fix/kind-mount-propagation
parent
05f9acf8a0
commit
ad68d505ae
|
|
@ -11,7 +11,7 @@
|
|||
# 3. [deploy] laconic-so deployment start, then immediately scale to 0
|
||||
# 4. [snapshot] Download snapshot via aria2c to host bind mount
|
||||
# 5. [snapshot] Verify snapshot visible inside kind node
|
||||
# 6. [deploy] Scale validator back to 1
|
||||
# 6. [deploy,scale-up] Scale validator back to 1
|
||||
# 7. [verify] Wait for pod Running, check logs + RPC health
|
||||
#
|
||||
# The validator cannot run during snapshot download — it would lock/use the
|
||||
|
|
@ -38,6 +38,10 @@
|
|||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
|
||||
# --tags snapshot
|
||||
#
|
||||
# # Resume after partial failure (download snapshot, scale up, verify)
|
||||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
|
||||
# --tags snapshot,scale-up,verify
|
||||
#
|
||||
- name: Redeploy agave validator on biscayne
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
|
|
@ -51,7 +55,7 @@
|
|||
kind_cluster: laconic-70ce4c4b47e23b85
|
||||
k8s_namespace: "laconic-{{ kind_cluster }}"
|
||||
deployment_name: "{{ kind_cluster }}-deployment"
|
||||
snapshot_dir: /srv/solana/snapshots
|
||||
snapshot_dir: /srv/kind/solana/snapshots
|
||||
ledger_dir: /srv/solana/ledger
|
||||
accounts_dir: /srv/solana/ramdisk/accounts
|
||||
ramdisk_mount: /srv/solana/ramdisk
|
||||
|
|
@ -185,7 +189,7 @@
|
|||
cmd: >
|
||||
set -o pipefail &&
|
||||
docker exec {{ kind_cluster }}-control-plane
|
||||
df -T /mnt/solana/ramdisk 2>/dev/null | grep -q xfs
|
||||
df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs
|
||||
executable: /bin/bash
|
||||
register: kind_ramdisk_check
|
||||
failed_when: kind_ramdisk_check.rc != 0
|
||||
|
|
@ -221,10 +225,31 @@
|
|||
|
||||
- name: Start deployment (creates kind cluster + deploys pod)
|
||||
ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start"
|
||||
changed_when: true
|
||||
register: deploy_start
|
||||
changed_when: deploy_start.rc == 0
|
||||
failed_when: false
|
||||
timeout: 1200
|
||||
tags: [deploy]
|
||||
|
||||
- name: Verify deployment started or already exists
|
||||
ansible.builtin.command: >
|
||||
kubectl get deployment {{ deployment_name }}
|
||||
-n {{ k8s_namespace }}
|
||||
-o jsonpath='{.metadata.name}'
|
||||
register: deploy_verify
|
||||
changed_when: false
|
||||
failed_when: deploy_verify.rc != 0
|
||||
when: deploy_start.rc != 0
|
||||
tags: [deploy]
|
||||
|
||||
- name: Show deployment start warning
|
||||
ansible.builtin.debug:
|
||||
msg: >-
|
||||
laconic-so deployment start returned rc={{ deploy_start.rc }}
|
||||
but deployment exists — continuing (idempotent).
|
||||
when: deploy_start.rc != 0 and (deploy_verify.rc | default(1)) == 0
|
||||
tags: [deploy]
|
||||
|
||||
- name: Wait for deployment to exist
|
||||
ansible.builtin.command: >
|
||||
kubectl get deployment {{ deployment_name }}
|
||||
|
|
@ -275,7 +300,7 @@
|
|||
- name: Verify kind node mounts
|
||||
ansible.builtin.command: >
|
||||
docker exec {{ kind_cluster }}-control-plane
|
||||
ls /mnt/solana/snapshots/
|
||||
ls /mnt/validator-snapshots/
|
||||
register: kind_mount_check
|
||||
changed_when: false
|
||||
tags: [snapshot]
|
||||
|
|
@ -301,7 +326,7 @@
|
|||
ansible.builtin.shell: >
|
||||
set -o pipefail &&
|
||||
docker exec {{ kind_cluster }}-control-plane
|
||||
find /mnt/solana/snapshots/ -name '*.tar.*' -maxdepth 1 | head -5
|
||||
find /mnt/validator-snapshots/ -name '*.tar.*' -maxdepth 1 | head -5
|
||||
register: kind_snapshot_check
|
||||
failed_when: kind_snapshot_check.stdout == ""
|
||||
changed_when: false
|
||||
|
|
@ -320,7 +345,7 @@
|
|||
kubectl scale deployment {{ deployment_name }}
|
||||
-n {{ k8s_namespace }} --replicas=1
|
||||
changed_when: true
|
||||
tags: [deploy]
|
||||
tags: [deploy, scale-up]
|
||||
|
||||
# ---- verify: confirm validator is running --------------------------------
|
||||
- name: Wait for pod to be running
|
||||
|
|
@ -334,10 +359,17 @@
|
|||
changed_when: false
|
||||
tags: [verify]
|
||||
|
||||
- name: Verify unified mount inside kind node
|
||||
ansible.builtin.command: "docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/"
|
||||
- name: Verify PV mounts inside kind node
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
docker exec {{ kind_cluster }}-control-plane
|
||||
df -T /mnt/validator-ledger /mnt/validator-accounts
|
||||
/mnt/validator-snapshots /mnt/validator-log 2>&1
|
||||
executable: /bin/bash
|
||||
register: mount_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
tags: [verify]
|
||||
|
||||
- name: Show mount contents
|
||||
|
|
|
|||
Loading…
Reference in New Issue