fix: remove Ansible snapshot download, add sync-tools playbook
The container entrypoint (entrypoint.py) handles snapshot download internally via aria2c. Ansible no longer needs to scale-to-0, download, scale-to-1 — it just deploys and lets the container manage startup. - biscayne-redeploy.yml: remove snapshot download section, simplify to teardown → wipe → deploy → verify - biscayne-sync-tools.yml: new playbook to sync laconic-so and agave-stack repos on biscayne, with separate branch controls - snapshot_download.py: re-probe for fresh incremental after full snapshot download completes (old incremental is stale by then) - Switch laconic_so_branch to fix/kind-mount-propagation (has hostNetwork translation code) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>fix/kind-mount-propagation
parent
3574e387cc
commit
bd38c1b791
|
|
@ -1,46 +1,33 @@
|
||||||
---
|
---
|
||||||
# Redeploy agave-stack on biscayne with aria2c snapshot pre-download
|
# Redeploy agave-stack on biscayne
|
||||||
#
|
#
|
||||||
# The validator's built-in downloader fetches snapshots at ~18 MB/s (single
|
# The container entrypoint (entrypoint.py) handles snapshot download and
|
||||||
# connection). snapshot-download.py uses aria2c with 16 parallel connections to
|
# agave-validator startup internally. This playbook just manages the k8s
|
||||||
# saturate available bandwidth, cutting 90+ min downloads to ~10 min.
|
# lifecycle: teardown, optional data wipe, deploy, and verify.
|
||||||
#
|
#
|
||||||
# Flow:
|
# Flow:
|
||||||
# 1. [teardown] Delete k8s namespace (preserve kind cluster)
|
# 1. [teardown] Scale to 0, wait for clean exit, delete namespace
|
||||||
# 2. [wipe] Conditionally clear ledger / accounts / old snapshots
|
# 2. [wipe] Conditionally clear ledger / accounts / old snapshots
|
||||||
# 3. [deploy] laconic-so deployment start, then immediately scale to 0
|
# 3. [deploy] Preflight checks, laconic-so deployment start
|
||||||
# 4. [snapshot] Download snapshot via aria2c to host bind mount
|
# 4. [verify] Wait for pod Running, check logs + RPC health
|
||||||
# 5. [snapshot] Verify snapshot visible inside kind node
|
|
||||||
# 6. [deploy,scale-up] Scale validator back to 1
|
|
||||||
# 7. [verify] Wait for pod Running, check logs + RPC health
|
|
||||||
#
|
#
|
||||||
# The validator cannot run during snapshot download — it would lock/use the
|
# The entrypoint.py inside the container:
|
||||||
# snapshot files. laconic-so creates the cluster AND deploys the pod in one
|
# - Checks snapshot freshness against mainnet
|
||||||
# shot, so we scale to 0 immediately after deploy, download, then scale to 1.
|
# - Downloads fresh snapshot via aria2c if needed
|
||||||
|
# - Builds agave-validator args from env vars
|
||||||
|
# - Execs agave-validator
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# # Standard redeploy (download snapshot, preserve accounts + ledger)
|
# # Standard redeploy
|
||||||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml
|
# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-redeploy.yml
|
||||||
#
|
#
|
||||||
# # Full wipe (accounts + ledger) — slow rebuild
|
# # Full wipe (accounts + ledger) — slow rebuild
|
||||||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
|
# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-redeploy.yml \
|
||||||
# -e wipe_accounts=true -e wipe_ledger=true
|
# -e wipe_accounts=true -e wipe_ledger=true
|
||||||
#
|
#
|
||||||
# # Skip snapshot download (use existing)
|
# # Skip snapshot cleanup (use existing)
|
||||||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
|
# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-redeploy.yml \
|
||||||
# -e skip_snapshot=true
|
# -e skip_snapshot_cleanup=true
|
||||||
#
|
|
||||||
# # Pass extra args to snapshot-download.py
|
|
||||||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
|
|
||||||
# -e 'snapshot_args=--version 2.2 --min-download-speed 50'
|
|
||||||
#
|
|
||||||
# # Snapshot only (no teardown/deploy)
|
|
||||||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
|
|
||||||
# --tags snapshot
|
|
||||||
#
|
|
||||||
# # Resume after partial failure (download snapshot, scale up, verify)
|
|
||||||
# ansible-playbook -i biscayne.vaasl.io, playbooks/biscayne-redeploy.yml \
|
|
||||||
# --tags snapshot,scale-up,verify
|
|
||||||
#
|
#
|
||||||
- name: Redeploy agave validator on biscayne
|
- name: Redeploy agave validator on biscayne
|
||||||
hosts: all
|
hosts: all
|
||||||
|
|
@ -53,7 +40,7 @@
|
||||||
stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave
|
stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave
|
||||||
laconic_so: /home/rix/.local/bin/laconic-so
|
laconic_so: /home/rix/.local/bin/laconic-so
|
||||||
laconic_so_repo: /home/rix/stack-orchestrator
|
laconic_so_repo: /home/rix/stack-orchestrator
|
||||||
laconic_so_branch: main
|
laconic_so_branch: fix/kind-mount-propagation
|
||||||
kind_cluster: laconic-70ce4c4b47e23b85
|
kind_cluster: laconic-70ce4c4b47e23b85
|
||||||
k8s_namespace: "laconic-{{ kind_cluster }}"
|
k8s_namespace: "laconic-{{ kind_cluster }}"
|
||||||
deployment_name: "{{ kind_cluster }}-deployment"
|
deployment_name: "{{ kind_cluster }}-deployment"
|
||||||
|
|
@ -62,13 +49,10 @@
|
||||||
accounts_dir: /srv/kind/solana/ramdisk/accounts
|
accounts_dir: /srv/kind/solana/ramdisk/accounts
|
||||||
ramdisk_mount: /srv/kind/solana/ramdisk
|
ramdisk_mount: /srv/kind/solana/ramdisk
|
||||||
ramdisk_size: 1024G
|
ramdisk_size: 1024G
|
||||||
snapshot_script_local: "{{ playbook_dir }}/../scripts/agave-container/snapshot_download.py"
|
|
||||||
snapshot_script: /tmp/snapshot-download.py
|
|
||||||
# Flags — non-destructive by default
|
# Flags — non-destructive by default
|
||||||
wipe_accounts: false
|
wipe_accounts: false
|
||||||
wipe_ledger: false
|
wipe_ledger: false
|
||||||
skip_snapshot: false
|
skip_snapshot_cleanup: false
|
||||||
snapshot_args: ""
|
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
# ---- teardown: graceful stop, then delete namespace ----------------------
|
# ---- teardown: graceful stop, then delete namespace ----------------------
|
||||||
|
|
@ -121,12 +105,14 @@
|
||||||
tags: [teardown]
|
tags: [teardown]
|
||||||
|
|
||||||
- name: Clear stale claimRefs on Released PVs
|
- name: Clear stale claimRefs on Released PVs
|
||||||
ansible.builtin.shell: |
|
ansible.builtin.shell:
|
||||||
set -o pipefail
|
cmd: |
|
||||||
for pv in $(kubectl get pv -o jsonpath='{range .items[?(@.status.phase=="Released")]}{.metadata.name}{"\n"}{end}'); do
|
set -o pipefail
|
||||||
kubectl patch pv "$pv" --type json \
|
for pv in $(kubectl get pv -o jsonpath='{range .items[?(@.status.phase=="Released")]}{.metadata.name}{"\n"}{end}'); do
|
||||||
-p '[{"op":"remove","path":"/spec/claimRef"}]'
|
kubectl patch pv "$pv" --type json \
|
||||||
done
|
-p '[{"op":"remove","path":"/spec/claimRef"}]'
|
||||||
|
done
|
||||||
|
executable: /bin/bash
|
||||||
register: pv_patch
|
register: pv_patch
|
||||||
changed_when: pv_patch.stdout != ""
|
changed_when: pv_patch.stdout != ""
|
||||||
tags: [teardown]
|
tags: [teardown]
|
||||||
|
|
@ -151,20 +137,22 @@
|
||||||
tags: [wipe]
|
tags: [wipe]
|
||||||
|
|
||||||
- name: Clean old snapshots (keep newest full + incremental)
|
- name: Clean old snapshots (keep newest full + incremental)
|
||||||
ansible.builtin.shell: |
|
ansible.builtin.shell:
|
||||||
set -o pipefail
|
cmd: |
|
||||||
cd {{ snapshot_dir }} || exit 0
|
set -o pipefail
|
||||||
newest=$(ls -t snapshot-*.tar.* 2>/dev/null | head -1)
|
cd {{ snapshot_dir }} || exit 0
|
||||||
if [ -n "$newest" ]; then
|
newest=$(ls -t snapshot-*.tar.* 2>/dev/null | head -1)
|
||||||
newest_inc=$(ls -t incremental-snapshot-*.tar.* 2>/dev/null | head -1)
|
if [ -n "$newest" ]; then
|
||||||
find . -maxdepth 1 -name '*.tar.*' \
|
newest_inc=$(ls -t incremental-snapshot-*.tar.* 2>/dev/null | head -1)
|
||||||
! -name "$newest" \
|
find . -maxdepth 1 -name '*.tar.*' \
|
||||||
! -name "${newest_inc:-__none__}" \
|
! -name "$newest" \
|
||||||
-delete
|
! -name "${newest_inc:-__none__}" \
|
||||||
fi
|
-delete
|
||||||
|
fi
|
||||||
|
executable: /bin/bash
|
||||||
become: true
|
become: true
|
||||||
changed_when: true
|
changed_when: true
|
||||||
when: not skip_snapshot | bool
|
when: not skip_snapshot_cleanup | bool
|
||||||
tags: [wipe]
|
tags: [wipe]
|
||||||
|
|
||||||
# ---- preflight: verify ramdisk and mounts before deploy ------------------
|
# ---- preflight: verify ramdisk and mounts before deploy ------------------
|
||||||
|
|
@ -175,35 +163,16 @@
|
||||||
changed_when: false
|
changed_when: false
|
||||||
tags: [deploy, preflight]
|
tags: [deploy, preflight]
|
||||||
|
|
||||||
- name: Verify ramdisk is xfs (not the underlying ZFS)
|
- name: Verify ramdisk is tmpfs (not the underlying ZFS)
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q xfs
|
cmd: set -o pipefail && df -T {{ ramdisk_mount }} | grep -q tmpfs
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
register: ramdisk_type
|
register: ramdisk_type
|
||||||
failed_when: ramdisk_type.rc != 0
|
failed_when: ramdisk_type.rc != 0
|
||||||
changed_when: false
|
changed_when: false
|
||||||
tags: [deploy, preflight]
|
tags: [deploy, preflight]
|
||||||
|
|
||||||
# ---- deploy: sync config, bring up cluster, scale to 0 ------------------
|
# ---- deploy: bring up cluster, let entrypoint handle snapshot ------------
|
||||||
- name: Pull agave-stack repo
|
|
||||||
ansible.builtin.shell: |
|
|
||||||
cd {{ stack_repo }}
|
|
||||||
git fetch origin
|
|
||||||
git reset --hard origin/{{ laconic_so_branch }}
|
|
||||||
changed_when: true
|
|
||||||
tags: [deploy]
|
|
||||||
|
|
||||||
- name: Regenerate deployment config from updated stack
|
|
||||||
ansible.builtin.command: >
|
|
||||||
{{ laconic_so }}
|
|
||||||
--stack {{ stack_path }}
|
|
||||||
deploy create
|
|
||||||
--spec-file {{ deployment_dir }}/spec.yml
|
|
||||||
--deployment-dir {{ deployment_dir }}
|
|
||||||
--update
|
|
||||||
changed_when: true
|
|
||||||
tags: [deploy]
|
|
||||||
|
|
||||||
- name: Check kind-config.yml mount style
|
- name: Check kind-config.yml mount style
|
||||||
ansible.builtin.command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml"
|
ansible.builtin.command: "grep -c 'containerPath: /mnt$' {{ deployment_dir }}/kind-config.yml"
|
||||||
register: mount_root_check
|
register: mount_root_check
|
||||||
|
|
@ -220,14 +189,6 @@
|
||||||
when: mount_root_check.stdout | default('0') | int < 1
|
when: mount_root_check.stdout | default('0') | int < 1
|
||||||
tags: [deploy]
|
tags: [deploy]
|
||||||
|
|
||||||
- name: Update laconic-so (editable install)
|
|
||||||
ansible.builtin.shell: |
|
|
||||||
cd {{ laconic_so_repo }}
|
|
||||||
git fetch origin
|
|
||||||
git reset --hard origin/{{ laconic_so_branch }}
|
|
||||||
changed_when: true
|
|
||||||
tags: [deploy]
|
|
||||||
|
|
||||||
- name: Start deployment (creates kind cluster + deploys pod)
|
- name: Start deployment (creates kind cluster + deploys pod)
|
||||||
ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start"
|
ansible.builtin.command: "{{ laconic_so }} deployment --dir {{ deployment_dir }} start"
|
||||||
register: deploy_start
|
register: deploy_start
|
||||||
|
|
@ -272,99 +233,17 @@
|
||||||
cmd: >
|
cmd: >
|
||||||
set -o pipefail &&
|
set -o pipefail &&
|
||||||
docker exec {{ kind_cluster }}-control-plane
|
docker exec {{ kind_cluster }}-control-plane
|
||||||
df -T /mnt/validator-accounts 2>/dev/null | grep -q xfs
|
df -T /mnt/validator-accounts 2>/dev/null | grep -q tmpfs
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
register: kind_ramdisk_check
|
register: kind_ramdisk_check
|
||||||
failed_when: kind_ramdisk_check.rc != 0
|
failed_when: kind_ramdisk_check.rc != 0
|
||||||
changed_when: false
|
changed_when: false
|
||||||
tags: [deploy]
|
tags: [deploy]
|
||||||
|
|
||||||
- name: Scale validator to 0 (stop before snapshot download)
|
|
||||||
ansible.builtin.command: >
|
|
||||||
kubectl scale deployment {{ deployment_name }}
|
|
||||||
-n {{ k8s_namespace }} --replicas=0
|
|
||||||
changed_when: true
|
|
||||||
tags: [deploy]
|
|
||||||
|
|
||||||
- name: Wait for pods to terminate
|
|
||||||
ansible.builtin.command: >
|
|
||||||
kubectl get pods -n {{ k8s_namespace }}
|
|
||||||
-l app={{ deployment_name }}
|
|
||||||
-o jsonpath='{.items}'
|
|
||||||
register: pods_gone
|
|
||||||
retries: 30
|
|
||||||
delay: 5
|
|
||||||
until: pods_gone.stdout == "[]" or pods_gone.stdout == ""
|
|
||||||
changed_when: false
|
|
||||||
failed_when: false
|
|
||||||
tags: [deploy]
|
|
||||||
|
|
||||||
# ---- snapshot: download via aria2c, verify in kind node ------------------
|
|
||||||
- name: Verify aria2c installed
|
|
||||||
ansible.builtin.command: which aria2c
|
|
||||||
changed_when: false
|
|
||||||
when: not skip_snapshot | bool
|
|
||||||
tags: [snapshot]
|
|
||||||
|
|
||||||
- name: Copy snapshot script to remote
|
|
||||||
ansible.builtin.copy:
|
|
||||||
src: "{{ snapshot_script_local }}"
|
|
||||||
dest: "{{ snapshot_script }}"
|
|
||||||
mode: "0755"
|
|
||||||
when: not skip_snapshot | bool
|
|
||||||
tags: [snapshot]
|
|
||||||
|
|
||||||
- name: Verify kind node mounts
|
|
||||||
ansible.builtin.command: >
|
|
||||||
docker exec {{ kind_cluster }}-control-plane
|
|
||||||
ls /mnt/validator-snapshots/
|
|
||||||
register: kind_mount_check
|
|
||||||
changed_when: false
|
|
||||||
tags: [snapshot]
|
|
||||||
|
|
||||||
- name: Download snapshot via aria2c
|
|
||||||
ansible.builtin.shell: >
|
|
||||||
python3 {{ snapshot_script }}
|
|
||||||
-o {{ snapshot_dir }}
|
|
||||||
{{ snapshot_args }}
|
|
||||||
become: true
|
|
||||||
register: snapshot_result
|
|
||||||
changed_when: true
|
|
||||||
when: not skip_snapshot | bool
|
|
||||||
timeout: 3600
|
|
||||||
tags: [snapshot]
|
|
||||||
|
|
||||||
- name: Show snapshot download result
|
|
||||||
ansible.builtin.debug:
|
|
||||||
msg: "{{ snapshot_result.stdout_lines | default(['skipped']) }}"
|
|
||||||
tags: [snapshot]
|
|
||||||
|
|
||||||
- name: Verify snapshot visible inside kind node
|
|
||||||
ansible.builtin.shell: >
|
|
||||||
set -o pipefail &&
|
|
||||||
docker exec {{ kind_cluster }}-control-plane
|
|
||||||
find /mnt/validator-snapshots/ -name '*.tar.*' -maxdepth 1 | head -5
|
|
||||||
register: kind_snapshot_check
|
|
||||||
failed_when: kind_snapshot_check.stdout == ""
|
|
||||||
changed_when: false
|
|
||||||
when: not skip_snapshot | bool
|
|
||||||
tags: [snapshot]
|
|
||||||
|
|
||||||
- name: Show snapshot files in kind node
|
|
||||||
ansible.builtin.debug:
|
|
||||||
msg: "{{ kind_snapshot_check.stdout_lines | default(['skipped']) }}"
|
|
||||||
when: not skip_snapshot | bool
|
|
||||||
tags: [snapshot]
|
|
||||||
|
|
||||||
# ---- deploy (cont): scale validator back up with snapshot ----------------
|
|
||||||
- name: Scale validator to 1 (start with downloaded snapshot)
|
|
||||||
ansible.builtin.command: >
|
|
||||||
kubectl scale deployment {{ deployment_name }}
|
|
||||||
-n {{ k8s_namespace }} --replicas=1
|
|
||||||
changed_when: true
|
|
||||||
tags: [deploy, scale-up]
|
|
||||||
|
|
||||||
# ---- verify: confirm validator is running --------------------------------
|
# ---- verify: confirm validator is running --------------------------------
|
||||||
|
# The entrypoint.py handles snapshot download + agave-validator startup.
|
||||||
|
# Pod will be Running once the container starts, but agave-validator won't
|
||||||
|
# exec until after snapshot download completes (if needed).
|
||||||
- name: Wait for pod to be running
|
- name: Wait for pod to be running
|
||||||
ansible.builtin.command: >
|
ansible.builtin.command: >
|
||||||
kubectl get pods -n {{ k8s_namespace }}
|
kubectl get pods -n {{ k8s_namespace }}
|
||||||
|
|
|
||||||
|
|
@ -61,24 +61,33 @@
|
||||||
# laconic-so creates individual extraMounts per volume:
|
# laconic-so creates individual extraMounts per volume:
|
||||||
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
|
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
|
||||||
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
|
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
|
||||||
- name: Verify kind node sees XFS at PV paths
|
- name: Verify kind node sees correct filesystems at PV paths
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: >
|
cmd: >
|
||||||
set -o pipefail &&
|
set -o pipefail &&
|
||||||
docker exec {{ kind_node }}
|
docker exec {{ kind_node }}
|
||||||
df -T /mnt/validator-ledger /mnt/validator-accounts
|
df -T /mnt/validator-ledger /mnt/validator-accounts
|
||||||
| grep -c xfs
|
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
register: kind_xfs_check
|
register: kind_fs_check
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Fail if PV paths are not XFS
|
- name: Fail if ledger is not XFS (zvol)
|
||||||
ansible.builtin.fail:
|
ansible.builtin.fail:
|
||||||
msg: >-
|
msg: >-
|
||||||
Expected 2 XFS mounts (validator-ledger, validator-accounts) but
|
validator-ledger must be XFS (on zvol). Got:
|
||||||
found {{ kind_xfs_check.stdout }}. Run biscayne-prepare-agave.yml
|
{{ kind_fs_check.stdout }}
|
||||||
and restart the kind container.
|
when: "'xfs' not in kind_fs_check.stdout"
|
||||||
when: kind_xfs_check.stdout | int < 2
|
|
||||||
|
- name: Fail if accounts is on ZFS (must be tmpfs)
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: >
|
||||||
|
set -o pipefail &&
|
||||||
|
docker exec {{ kind_node }}
|
||||||
|
df -T /mnt/validator-accounts | grep -q zfs
|
||||||
|
executable: /bin/bash
|
||||||
|
register: accounts_zfs_check
|
||||||
|
changed_when: false
|
||||||
|
failed_when: accounts_zfs_check.rc == 0
|
||||||
|
|
||||||
- name: Show kind node PV filesystems
|
- name: Show kind node PV filesystems
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,96 @@
|
||||||
|
---
|
||||||
|
# Sync laconic-so and agave-stack to latest on biscayne
|
||||||
|
#
|
||||||
|
# Updates both repos that laconic-so deployment commands depend on:
|
||||||
|
# - stack-orchestrator (laconic-so itself, editable install)
|
||||||
|
# - agave-stack (stack definitions, compose files, container scripts)
|
||||||
|
#
|
||||||
|
# Then regenerates the deployment config from the updated stack.
|
||||||
|
# Does NOT restart anything — just syncs code and config.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-sync-tools.yml
|
||||||
|
#
|
||||||
|
# # Use a feature branch
|
||||||
|
# ansible-playbook -i inventory/biscayne.yml playbooks/biscayne-sync-tools.yml \
|
||||||
|
# -e laconic_so_branch=fix/kind-mount-propagation
|
||||||
|
#
|
||||||
|
- name: Sync laconic-so and agave-stack
|
||||||
|
hosts: all
|
||||||
|
gather_facts: false
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: /home/rix/.kube/config
|
||||||
|
vars:
|
||||||
|
deployment_dir: /srv/deployments/agave
|
||||||
|
stack_repo: /srv/deployments/agave-stack
|
||||||
|
stack_path: /srv/deployments/agave-stack/stack-orchestrator/stacks/agave
|
||||||
|
laconic_so: /home/rix/.local/bin/laconic-so
|
||||||
|
laconic_so_repo: /home/rix/stack-orchestrator
|
||||||
|
laconic_so_branch: fix/kind-mount-propagation
|
||||||
|
stack_branch: main
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Update laconic-so (editable install)
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
cd {{ laconic_so_repo }}
|
||||||
|
git fetch origin
|
||||||
|
git reset --hard origin/{{ laconic_so_branch }}
|
||||||
|
register: laconic_so_update
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Show laconic-so version
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: set -o pipefail && cd {{ laconic_so_repo }} && git log --oneline -1
|
||||||
|
executable: /bin/bash
|
||||||
|
register: laconic_so_version
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Report laconic-so
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "laconic-so: {{ laconic_so_version.stdout }}"
|
||||||
|
|
||||||
|
- name: Find SSH agent socket
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: set -o pipefail && ls -t /tmp/ssh-*/agent.* 2>/dev/null | head -1
|
||||||
|
executable: /bin/bash
|
||||||
|
register: ssh_agent_socket
|
||||||
|
changed_when: false
|
||||||
|
failed_when: ssh_agent_socket.stdout == ""
|
||||||
|
|
||||||
|
- name: Pull agave-stack repo
|
||||||
|
ansible.builtin.shell: |
|
||||||
|
export SSH_AUTH_SOCK={{ ssh_agent_socket.stdout }}
|
||||||
|
cd {{ stack_repo }}
|
||||||
|
git fetch origin
|
||||||
|
git reset --hard origin/{{ stack_branch }}
|
||||||
|
register: stack_update
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Show agave-stack version
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: set -o pipefail && cd {{ stack_repo }} && git log --oneline -1
|
||||||
|
executable: /bin/bash
|
||||||
|
register: stack_version
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Report agave-stack
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "agave-stack: {{ stack_version.stdout }}"
|
||||||
|
|
||||||
|
- name: Regenerate deployment config from updated stack
|
||||||
|
ansible.builtin.command: >
|
||||||
|
{{ laconic_so }}
|
||||||
|
--stack {{ stack_path }}
|
||||||
|
deploy create
|
||||||
|
--spec-file {{ deployment_dir }}/spec.yml
|
||||||
|
--deployment-dir {{ deployment_dir }}
|
||||||
|
--update
|
||||||
|
register: regen_result
|
||||||
|
changed_when: true
|
||||||
|
|
||||||
|
- name: Report sync complete
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: >-
|
||||||
|
Sync complete. laconic-so and agave-stack updated to
|
||||||
|
origin/{{ laconic_so_branch }}. Deployment config regenerated.
|
||||||
|
Restart or redeploy required to apply changes.
|
||||||
|
|
@ -513,11 +513,18 @@ def download_best_snapshot(
|
||||||
for filename, mirror_urls in download_plan:
|
for filename, mirror_urls in download_plan:
|
||||||
log.info(" %s (%d mirrors)", filename, len(mirror_urls))
|
log.info(" %s (%d mirrors)", filename, len(mirror_urls))
|
||||||
|
|
||||||
# Download
|
# Download — full snapshot first, then re-probe for fresh incremental
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
total_start: float = time.monotonic()
|
total_start: float = time.monotonic()
|
||||||
|
|
||||||
|
# Separate full and incremental from the initial plan
|
||||||
|
full_downloads: list[tuple[str, list[str]]] = []
|
||||||
for filename, mirror_urls in download_plan:
|
for filename, mirror_urls in download_plan:
|
||||||
|
if filename.startswith("snapshot-"):
|
||||||
|
full_downloads.append((filename, mirror_urls))
|
||||||
|
|
||||||
|
# Download full snapshot(s)
|
||||||
|
for filename, mirror_urls in full_downloads:
|
||||||
filepath: Path = Path(output_dir) / filename
|
filepath: Path = Path(output_dir) / filename
|
||||||
if filepath.exists() and filepath.stat().st_size > 0:
|
if filepath.exists() and filepath.stat().st_size > 0:
|
||||||
log.info("Skipping %s (already exists: %.1f GB)",
|
log.info("Skipping %s (already exists: %.1f GB)",
|
||||||
|
|
@ -527,6 +534,47 @@ def download_best_snapshot(
|
||||||
log.error("Failed to download %s", filename)
|
log.error("Failed to download %s", filename)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# After full snapshot download, re-probe for a fresh incremental.
|
||||||
|
# The initial incremental is stale by now (full download takes 10+ min).
|
||||||
|
if not full_only:
|
||||||
|
# Get the full snapshot slot from the filename we just downloaded
|
||||||
|
full_filename: str = full_downloads[0][0]
|
||||||
|
fm_post: re.Match[str] | None = FULL_SNAP_RE.match(full_filename)
|
||||||
|
if fm_post:
|
||||||
|
full_snap_slot: int = int(fm_post.group(1))
|
||||||
|
log.info("Re-probing for fresh incremental based on slot %d...", full_snap_slot)
|
||||||
|
inc_downloaded: bool = False
|
||||||
|
for source in fast_sources:
|
||||||
|
inc_url_re: str = f"http://{source.rpc_address}/incremental-snapshot.tar.bz2"
|
||||||
|
inc_location, _ = head_no_follow(inc_url_re, timeout=2)
|
||||||
|
if not inc_location:
|
||||||
|
continue
|
||||||
|
inc_fn, inc_fp = _parse_snapshot_filename(inc_location)
|
||||||
|
m_inc: re.Match[str] | None = INCR_SNAP_RE.match(inc_fn)
|
||||||
|
if not m_inc:
|
||||||
|
continue
|
||||||
|
if int(m_inc.group(1)) != full_snap_slot:
|
||||||
|
log.debug(" %s: incremental base slot %s != full %d, skipping",
|
||||||
|
source.rpc_address, m_inc.group(1), full_snap_slot)
|
||||||
|
continue
|
||||||
|
# Found a matching incremental — build mirror list and download
|
||||||
|
inc_mirrors: list[str] = [f"http://{source.rpc_address}{inc_fp}"]
|
||||||
|
for other in fast_sources:
|
||||||
|
if other.rpc_address == source.rpc_address:
|
||||||
|
continue
|
||||||
|
other_loc, _ = head_no_follow(
|
||||||
|
f"http://{other.rpc_address}/incremental-snapshot.tar.bz2", timeout=2)
|
||||||
|
if other_loc:
|
||||||
|
other_fn, other_fp = _parse_snapshot_filename(other_loc)
|
||||||
|
if other_fn == inc_fn:
|
||||||
|
inc_mirrors.append(f"http://{other.rpc_address}{other_fp}")
|
||||||
|
log.info(" Found incremental %s (%d mirrors)", inc_fn, len(inc_mirrors))
|
||||||
|
if download_aria2c(inc_mirrors, output_dir, inc_fn, connections):
|
||||||
|
inc_downloaded = True
|
||||||
|
break
|
||||||
|
if not inc_downloaded:
|
||||||
|
log.info("No matching incremental found — validator will replay from full snapshot")
|
||||||
|
|
||||||
total_elapsed: float = time.monotonic() - total_start
|
total_elapsed: float = time.monotonic() - total_start
|
||||||
log.info("All downloads complete in %.0fs", total_elapsed)
|
log.info("All downloads complete in %.0fs", total_elapsed)
|
||||||
for filename, _ in download_plan:
|
for filename, _ in download_plan:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue