fix: inventory layering — playbooks use hosts:all, cross-inventory uses explicit hosts

Normal playbooks should never hardcode hostnames — that's an inventory
concern. Changed all playbooks to hosts:all. The one exception is
ashburn-relay-check.yml which legitimately spans both inventories
(switches + biscayne) and uses explicit hostnames.

Also adds:
- ashburn-relay-check.yml: full-path relay diagnostics (switches + host)
- biscayne-start.yml: start kind container and scale validator to 1
- ashburn-relay-setup.sh.j2: boot persistence script for relay state
- Direct device mounts replacing rbind (ZFS shared propagation fix)
- systemd service replacing broken if-up.d/netfilter-persistent
- PV mount path corrections (/mnt/validator-* not /mnt/solana/*)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
fix/kind-mount-propagation
A. F. Dudley 2026-03-07 22:28:21 +00:00
parent 14c0f63775
commit 9cbc115295
10 changed files with 631 additions and 85 deletions

View File

@ -72,11 +72,10 @@ These units run before docker, so the kind node's bind mounts always see the
ramdisk. **No manual intervention is needed after reboot.**
**Mount propagation**: The kind node bind-mounts `/srv/kind``/mnt` at container
start. New mounts under `/srv/kind` on the host (like the rbind at
`/srv/kind/solana`) do NOT propagate into the kind node because kind's default
mount propagation is `None`. A kind node restart is required to pick up new host
mounts. **TODO**: Fix laconic-so to set `propagation: HostToContainer` on the
kind-mount-root extraMount, which would make host mounts propagate automatically.
start. laconic-so sets `propagation: HostToContainer` on all kind extraMounts
(commit `a11d40f2` in stack-orchestrator), so host submounts (like the rbind at
`/srv/kind/solana`) propagate into the kind node automatically. A kind restart
is required to pick up the new config after updating laconic-so.
### KUBECONFIG

View File

@ -26,7 +26,7 @@
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true
- name: Configure biscayne Ashburn validator relay
hosts: biscayne
hosts: all
gather_facts: false
vars:
@ -72,9 +72,18 @@
ansible.builtin.shell:
cmd: |
set -o pipefail
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
--dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
2>/dev/null || true
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \
--dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
2>/dev/null || true
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j DNAT --to-destination {{ kind_node_ip }} \
2>/dev/null || true
executable: /bin/bash
changed_when: false
@ -82,9 +91,15 @@
ansible.builtin.shell:
cmd: |
set -o pipefail
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p udp --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p tcp --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
executable: /bin/bash
changed_when: false
@ -102,15 +117,21 @@
executable: /bin/bash
changed_when: false
- name: Persist cleaned iptables
ansible.builtin.command:
cmd: netfilter-persistent save
changed_when: true
- name: Disable and remove ashburn-relay service
ansible.builtin.systemd:
name: ashburn-relay.service
enabled: false
state: stopped
failed_when: false
- name: Remove if-up.d script
- name: Remove ashburn-relay files
ansible.builtin.file:
path: /etc/network/if-up.d/ashburn-routing
path: "{{ item }}"
state: absent
loop:
- /etc/systemd/system/ashburn-relay.service
- /usr/local/sbin/ashburn-relay-setup.sh
- /etc/network/if-up.d/ashburn-routing
- name: Rollback complete
ansible.builtin.debug:
@ -140,7 +161,7 @@
- name: Show existing iptables nat rules
ansible.builtin.shell:
cmd: iptables -t nat -L -v -n --line-numbers | head -60
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers | head -60
executable: /bin/bash
register: existing_nat
changed_when: false
@ -288,6 +309,7 @@
- name: Add policy routing rule for fwmark
ansible.builtin.shell:
cmd: |
set -o pipefail
if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then
echo "rule already exists"
else
@ -309,20 +331,51 @@
# ------------------------------------------------------------------
# Persistence
# ------------------------------------------------------------------
- name: Save iptables rules
ansible.builtin.command:
cmd: netfilter-persistent save
changed_when: true
# A systemd oneshot service replaces both if-up.d (which depends on
# networking.service, inactive on this host) and netfilter-persistent
# (which runs before Docker, so Docker's chain setup blows away rules).
# This service runs After=docker.service and idempotently applies all
# tunnel, iptables, and policy routing state.
- name: Install ashburn-relay systemd service
ansible.builtin.copy:
dest: /etc/systemd/system/ashburn-relay.service
mode: "0644"
content: |
[Unit]
Description=Ashburn validator relay (GRE tunnel, iptables, policy routing)
After=docker.service network-online.target
Wants=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/local/sbin/ashburn-relay-setup.sh
[Install]
WantedBy=multi-user.target
register: relay_unit
tags: [inbound, outbound]
- name: Install if-up.d persistence script
- name: Install ashburn-relay setup script
ansible.builtin.template:
src: files/ashburn-routing-ifup.sh.j2
dest: /etc/network/if-up.d/ashburn-routing
mode: '0755'
owner: root
group: root
tags: [outbound]
src: files/ashburn-relay-setup.sh.j2
dest: /usr/local/sbin/ashburn-relay-setup.sh
mode: "0755"
register: relay_script
tags: [inbound, outbound]
- name: Reload systemd and enable ashburn-relay
ansible.builtin.systemd:
name: ashburn-relay.service
daemon_reload: "{{ relay_unit.changed or relay_script.changed }}"
enabled: true
tags: [inbound, outbound]
- name: Remove stale if-up.d script
ansible.builtin.file:
path: /etc/network/if-up.d/ashburn-routing
state: absent
tags: [inbound, outbound]
# ------------------------------------------------------------------
# Verification
@ -345,7 +398,7 @@
- name: Show NAT rules
ansible.builtin.shell:
cmd: iptables -t nat -L -v -n --line-numbers 2>&1 | head -40
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers 2>&1 | head -40
executable: /bin/bash
register: nat_rules
changed_when: false
@ -374,7 +427,7 @@
- name: Show loopback addresses
ansible.builtin.shell:
cmd: ip addr show lo | grep inet
cmd: set -o pipefail && ip addr show lo | grep inet
executable: /bin/bash
register: lo_addrs
changed_when: false

View File

@ -0,0 +1,251 @@
---
# Ashburn relay health check — full path verification
#
# Cross-inventory playbook: checks was-sw01, mia-sw01, and biscayne.
# All tasks are read-only — safe to run at any time.
#
# Usage:
# ansible-playbook -i inventory-switches/switches.yml \
# -i inventory/biscayne.yml playbooks/ashburn-relay-check.yml
- name: Check was-sw01 relay config
hosts: was-sw01
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
tasks:
- name: Check loopback interfaces
arista.eos.eos_command:
commands:
- show ip interface brief | include Loopback
register: was_loopbacks
changed_when: false
- name: Check route for ashburn IP
arista.eos.eos_command:
commands:
- "show ip route {{ ashburn_ip }}"
register: was_route
changed_when: false
- name: Check Et1/1 config
arista.eos.eos_command:
commands:
- show running-config interfaces Ethernet1/1
register: was_et1
changed_when: false
- name: Check traffic-policies
arista.eos.eos_command:
commands:
- "show running-config | section traffic-policy"
register: was_traffic_policy
changed_when: false
- name: Check system-rule
arista.eos.eos_command:
commands:
- "show running-config | include system-rule"
register: was_system_rule
changed_when: false
- name: Check monitor sessions
arista.eos.eos_command:
commands:
- show monitor session
register: was_monitor
changed_when: false
- name: Check backbone interface
arista.eos.eos_command:
commands:
- show interfaces Ethernet4/1 status
register: was_backbone
changed_when: false
- name: Show was-sw01 relay status
ansible.builtin.debug:
msg:
loopbacks: "{{ was_loopbacks.stdout_lines[0] }}"
route_to_ashburn_ip: "{{ was_route.stdout_lines[0] }}"
et1_config: "{{ was_et1.stdout_lines[0] }}"
traffic_policy: "{{ was_traffic_policy.stdout[0] | default('none') }}"
system_rule: "{{ was_system_rule.stdout[0] | default('none') }}"
monitor_sessions: "{{ was_monitor.stdout_lines[0] }}"
backbone: "{{ was_backbone.stdout_lines[0] }}"
- name: Check mia-sw01 relay config
hosts: mia-sw01
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
tasks:
- name: Check tunnel interfaces
arista.eos.eos_command:
commands:
- show ip interface brief | include Tunnel
register: mia_tunnels
changed_when: false
- name: Check Tunnel100 config
arista.eos.eos_command:
commands:
- show running-config interfaces Tunnel100
register: mia_tunnel100
changed_when: false
- name: Check Tunnel100 ACL
arista.eos.eos_command:
commands:
- show ip access-lists SEC-VALIDATOR-100-IN
register: mia_acl
changed_when: false
- name: Check route for ashburn IP
arista.eos.eos_command:
commands:
- "show ip route {{ ashburn_ip }}"
register: mia_route
changed_when: false
- name: Check traffic-policies
arista.eos.eos_command:
commands:
- "show running-config | section traffic-policy"
register: mia_traffic_policy
changed_when: false
- name: Check system-rule
arista.eos.eos_command:
commands:
- "show running-config | include system-rule"
register: mia_system_rule
changed_when: false
- name: Check backbone interface
arista.eos.eos_command:
commands:
- show interfaces Ethernet4/1 status
register: mia_backbone
changed_when: false
- name: Show mia-sw01 relay status
ansible.builtin.debug:
msg:
tunnels: "{{ mia_tunnels.stdout_lines[0] }}"
tunnel100_config: "{{ mia_tunnel100.stdout_lines[0] }}"
tunnel100_acl: "{{ mia_acl.stdout_lines[0] }}"
route_to_ashburn_ip: "{{ mia_route.stdout_lines[0] }}"
traffic_policy: "{{ mia_traffic_policy.stdout[0] | default('none') }}"
system_rule: "{{ mia_system_rule.stdout[0] | default('none') }}"
backbone: "{{ mia_backbone.stdout_lines[0] }}"
- name: Check biscayne relay state
hosts: biscayne
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
tunnel_device: gre-ashburn
tunnel_remote_ip: 169.254.100.0
tasks:
- name: Check GRE tunnel
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel not found"
executable: /bin/bash
register: biscayne_tunnel
changed_when: false
- name: Check loopback IP
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip addr show lo | grep '{{ ashburn_ip }}' || echo "not configured"
executable: /bin/bash
register: biscayne_lo
changed_when: false
- name: Check iptables DNAT rules
ansible.builtin.shell:
cmd: >
set -o pipefail &&
iptables -t nat -L PREROUTING -v -n | grep '{{ ashburn_ip }}'
|| echo "no DNAT rules"
executable: /bin/bash
register: biscayne_dnat
changed_when: false
become: true
- name: Check iptables mangle rules
ansible.builtin.shell:
cmd: >
set -o pipefail &&
iptables -t mangle -L PREROUTING -v -n | grep 'MARK'
|| echo "no mangle rules"
executable: /bin/bash
register: biscayne_mangle
changed_when: false
become: true
- name: Check iptables SNAT rule
ansible.builtin.shell:
cmd: >
set -o pipefail &&
iptables -t nat -L POSTROUTING -v -n | grep '{{ ashburn_ip }}'
|| echo "no SNAT rule"
executable: /bin/bash
register: biscayne_snat
changed_when: false
become: true
- name: Check policy routing
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip rule show | grep ashburn || echo "no policy rule"
executable: /bin/bash
register: biscayne_policy
changed_when: false
- name: Check ashburn routing table
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip route show table ashburn 2>&1 || echo "table not found"
executable: /bin/bash
register: biscayne_table
changed_when: false
- name: Check tunnel ping
ansible.builtin.command:
cmd: "ping -c 2 -W 2 {{ tunnel_remote_ip }}"
register: biscayne_ping
changed_when: false
failed_when: false
- name: Check ashburn-relay service
ansible.builtin.systemd:
name: ashburn-relay.service
register: biscayne_service
check_mode: true
failed_when: false
- name: Show biscayne relay status
ansible.builtin.debug:
msg:
gre_tunnel: "{{ biscayne_tunnel.stdout }}"
loopback_ip: "{{ biscayne_lo.stdout }}"
dnat_rules: "{{ biscayne_dnat.stdout_lines }}"
mangle_rules: "{{ biscayne_mangle.stdout_lines }}"
snat_rule: "{{ biscayne_snat.stdout_lines }}"
policy_routing: "{{ biscayne_policy.stdout }}"
routing_table: "{{ biscayne_table.stdout }}"
tunnel_ping: "{{ 'OK (' + biscayne_ping.stdout_lines[-1] + ')' if biscayne_ping.rc == 0 else 'FAILED' }}"
systemd_service: "{{ biscayne_service.status.ActiveState | default('not installed') }}"

View File

@ -29,7 +29,7 @@
# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true
- name: Configure mia-sw01 validator relay tunnel
hosts: mia-sw01
hosts: all
gather_facts: false
vars:

View File

@ -19,7 +19,7 @@
# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e rollback=true
- name: Configure was-sw01 inbound validator relay
hosts: was-sw01
hosts: all
gather_facts: false
vars:

View File

@ -23,12 +23,13 @@
# mounting each boot.
# Persisted as: format-ramdisk.service (mkfs before mount) + fstab entry
#
# Invariant 3: /srv/kind/solana is an rbind of /srv/solana
# Invariant 3: /srv/kind/solana is XFS (zvol) and /srv/kind/solana/ramdisk is XFS (ram0)
# Why: kind mounts /srv/kind → /mnt inside the kind node. PVs reference
# /mnt/solana/*. Without the rbind, /srv/kind/solana resolves to the ZFS
# dataset (biscayne/DATA/srv/kind), not the zvol — violating invariant 1.
# Persisted as: fstab entry with x-systemd.requires=zfs-mount.service
# (must mount AFTER ZFS, or ZFS overlay at /srv/kind hides it)
# /mnt/solana/*. An rbind of /srv/solana does NOT work because ZFS's
# shared propagation (shared:75 on /srv) overlays ZFS on top of the bind.
# Direct device mounts bypass propagation entirely.
# Persisted as: two fstab entries — zvol at /srv/kind/solana, ram0 at
# /srv/kind/solana/ramdisk, both with x-systemd.requires ordering
#
# This playbook checks each invariant and only acts if it's not met.
# Idempotent — safe to run multiple times.
@ -48,6 +49,7 @@
kind_solana_dir: /srv/kind/solana
accounts_dir: /srv/solana/ramdisk/accounts
deployment_dir: /srv/deployments/agave
kind_ramdisk_opts: "noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service,x-systemd.requires=srv-kind-solana.mount"
tasks:
# ---- systemd units ----------------------------------------------------------
@ -106,30 +108,30 @@
line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0'
register: fstab_ramdisk
# rbind /srv/solana to /srv/kind/solana AFTER zfs-mount.service and ramdisk.
# Without this ordering, ZFS overlay at /srv/kind hides the bind mount.
- name: Ensure kind bind mount fstab entry
# Direct device mounts at /srv/kind/solana — bypasses ZFS shared propagation.
# An rbind of /srv/solana fails because ZFS's shared:75 on /srv overlays
# ZFS on top of any bind mount under /srv. Direct device mounts avoid this.
- name: Ensure kind zvol fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^\S+\s+{{ kind_solana_dir }}\s'
line: '{{ solana_dir }} {{ kind_solana_dir }} none rbind,nofail,x-systemd.requires=zfs-mount.service,x-systemd.requires=srv-solana-ramdisk.mount 0 0'
line: '{{ zvol_device }} {{ kind_solana_dir }} xfs defaults,nofail,x-systemd.requires=zfs-mount.service 0 0'
register: fstab_kind
# Remove stale fstab entries from previous attempts (direct zvol mount,
# separate ramdisk mount at /srv/kind/solana/ramdisk)
- name: Remove stale kind zvol fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^{{ zvol_device }}\s+{{ kind_solana_dir }}\s'
state: absent
register: fstab_stale_zvol
- name: Remove stale kind ramdisk fstab entry
- name: Ensure kind ramdisk fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^\S+\s+{{ kind_solana_dir }}/ramdisk\s'
line: "{{ ramdisk_device }} {{ kind_solana_dir }}/ramdisk xfs {{ kind_ramdisk_opts }} 0 0"
register: fstab_kind_ramdisk
# Remove stale rbind fstab entry from previous approach
- name: Remove stale kind rbind fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^\S+\s+{{ kind_solana_dir }}\s+none\s+rbind'
state: absent
register: fstab_stale_ramdisk
register: fstab_stale_rbind
# ---- reload and enable ------------------------------------------------------
- name: Reload systemd
@ -137,8 +139,9 @@
daemon_reload: true
when: >-
unit_file.changed or accounts_unit.changed or
fstab_zvol.changed or fstab_ramdisk.changed or fstab_kind.changed or
fstab_stale_zvol.changed or fstab_stale_ramdisk.changed
fstab_zvol.changed or fstab_ramdisk.changed or
fstab_kind.changed or fstab_kind_ramdisk.changed or
fstab_stale_rbind.changed
- name: Enable ramdisk services
ansible.builtin.systemd:
@ -164,14 +167,14 @@
changed_when: ramdisk_mounted.rc != 0
when: ramdisk_mounted.rc != 0
# ---- apply kind bind mount now if not correct ------------------------------
- name: Check kind bind mount
# ---- apply kind device mounts now if not correct ----------------------------
- name: Check kind zvol mount is XFS
ansible.builtin.shell:
cmd: >
set -o pipefail &&
findmnt -n -o SOURCE {{ kind_solana_dir }} | grep -q '{{ solana_dir }}'
findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q xfs
executable: /bin/bash
register: kind_mount_check
register: kind_zvol_check
failed_when: false
changed_when: false
@ -181,17 +184,47 @@
umount {{ kind_solana_dir }}/ramdisk 2>/dev/null || true
umount {{ kind_solana_dir }} 2>/dev/null || true
executable: /bin/bash
changed_when: kind_mount_check.rc != 0
when: kind_mount_check.rc != 0
changed_when: kind_zvol_check.rc != 0
when: kind_zvol_check.rc != 0
- name: Apply kind bind mount now
- name: Mount zvol at kind solana dir
ansible.posix.mount:
path: "{{ kind_solana_dir }}"
src: "{{ solana_dir }}"
fstype: none
opts: rbind
src: "{{ zvol_device }}"
fstype: xfs
state: mounted
when: kind_mount_check.rc != 0
when: kind_zvol_check.rc != 0
- name: Check kind ramdisk mount is XFS
ansible.builtin.shell:
cmd: >
set -o pipefail &&
findmnt -n -o FSTYPE {{ kind_solana_dir }}/ramdisk | grep -q xfs
executable: /bin/bash
register: kind_ramdisk_check
failed_when: false
changed_when: false
- name: Mount ramdisk at kind solana ramdisk dir
ansible.posix.mount:
path: "{{ kind_solana_dir }}/ramdisk"
src: "{{ ramdisk_device }}"
fstype: xfs
opts: noatime,nodiratime
state: mounted
when: kind_ramdisk_check.rc != 0
# Docker requires shared propagation on mounts it bind-mounts into
# containers. Without this, `docker start` fails with "not a shared
# or slave mount".
# No ansible module supports mount propagation flags; command required.
- name: Ensure shared propagation on kind mounts # noqa: command-instead-of-module
ansible.builtin.command:
cmd: mount --make-shared {{ item }}
loop:
- "{{ kind_solana_dir }}"
- "{{ kind_solana_dir }}/ramdisk"
changed_when: false
# ---- verify -----------------------------------------------------------------
- name: Verify ramdisk is XFS
@ -206,7 +239,19 @@
executable: /bin/bash
changed_when: false
- name: Verify kind bind mount contents
- name: Verify kind zvol is XFS
ansible.builtin.shell:
cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q xfs
executable: /bin/bash
changed_when: false
- name: Verify kind ramdisk is XFS
ansible.builtin.shell:
cmd: set -o pipefail && df -T {{ kind_solana_dir }}/ramdisk | grep -q xfs
executable: /bin/bash
changed_when: false
- name: Verify kind mount contents
ansible.builtin.shell:
cmd: >
set -o pipefail &&
@ -216,8 +261,11 @@
register: kind_mount_verify
changed_when: false
# Assert the kind node sees XFS (zvol), not ZFS. If this fails, kind
# needs a restart or laconic-so needs the HostToContainer propagation fix.
# Assert the kind node sees XFS at the PV mount paths.
# laconic-so creates individual extraMounts per volume:
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
# The PV hostPaths use /mnt/<volume-name>, not /mnt/solana/<subpath>.
- name: Read cluster-id from deployment
ansible.builtin.shell:
cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}'
@ -225,12 +273,13 @@
register: cluster_id_result
changed_when: false
- name: Verify kind node sees XFS at /mnt/solana
- name: Check kind node XFS visibility
ansible.builtin.shell:
cmd: >
set -o pipefail &&
docker exec {{ cluster_id_result.stdout }}-control-plane
stat -f -c '%T' /mnt/solana | grep -q xfs
df -T /mnt/validator-ledger /mnt/validator-accounts
| grep -c xfs
executable: /bin/bash
register: kind_fstype
changed_when: false
@ -240,4 +289,7 @@
ansible.builtin.debug:
msg:
kind_mount: "{{ kind_mount_verify.stdout_lines }}"
kind_fstype: "{{ 'xfs (correct)' if kind_fstype.rc == 0 else 'NOT XFS — kind restart required' }}"
kind_fstype: "{{ 'xfs (correct)' if kind_fstype.stdout | default('0') | int >= 2 else 'NOT XFS — kind restart required' }}"
- name: Configure Ashburn validator relay
ansible.builtin.import_playbook: ashburn-relay-biscayne.yml

View File

@ -0,0 +1,128 @@
---
# Start agave validator on biscayne
#
# Ensures the kind container is running, verifies XFS mounts are visible
# inside the kind node, then scales the deployment to 1.
#
# Prerequisites:
# - biscayne-prepare-agave.yml has been run (fstab entries, systemd units)
# - A snapshot exists in /srv/solana/snapshots (or use biscayne-recover.yml)
#
# Usage:
# ansible-playbook playbooks/biscayne-start.yml
#
- name: Start agave validator
hosts: all
gather_facts: false
environment:
KUBECONFIG: /home/rix/.kube/config
vars:
deployment_dir: /srv/deployments/agave
tasks:
# ---- discover cluster id -------------------------------------------------
- name: Read cluster-id from deployment
ansible.builtin.shell:
cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}'
executable: /bin/bash
register: cluster_id_result
changed_when: false
- name: Set cluster facts
ansible.builtin.set_fact:
kind_cluster: "{{ cluster_id_result.stdout }}"
kind_node: "{{ cluster_id_result.stdout }}-control-plane"
k8s_namespace: "laconic-{{ cluster_id_result.stdout }}"
deployment_name: "{{ cluster_id_result.stdout }}-deployment"
# ---- ensure kind container is running ------------------------------------
- name: Check kind container state
ansible.builtin.command: docker inspect -f '{% raw %}{{ .State.Running }}{% endraw %}' {{ kind_node }}
register: kind_running
failed_when: false
changed_when: false
- name: Start kind container
ansible.builtin.command: docker start {{ kind_node }}
when: kind_running.stdout | default('false') != 'true'
changed_when: true
- name: Wait for kind node ready
ansible.builtin.command: >
kubectl get node {{ kind_node }}
-o jsonpath='{.status.conditions[?(@.type=="Ready")].status}'
register: node_ready
changed_when: false
retries: 30
delay: 10
until: node_ready.stdout == "True"
# ---- verify mounts inside kind node --------------------------------------
# laconic-so creates individual extraMounts per volume:
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
- name: Verify kind node sees XFS at PV paths
ansible.builtin.shell:
cmd: >
set -o pipefail &&
docker exec {{ kind_node }}
df -T /mnt/validator-ledger /mnt/validator-accounts
| grep -c xfs
executable: /bin/bash
register: kind_xfs_check
changed_when: false
- name: Fail if PV paths are not XFS
ansible.builtin.fail:
msg: >-
Expected 2 XFS mounts (validator-ledger, validator-accounts) but
found {{ kind_xfs_check.stdout }}. Run biscayne-prepare-agave.yml
and restart the kind container.
when: kind_xfs_check.stdout | int < 2
- name: Show kind node PV filesystems
ansible.builtin.shell:
cmd: |
docker exec {{ kind_node }} df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log
executable: /bin/bash
register: kind_df
changed_when: false
- name: Show kind mount info
ansible.builtin.debug:
var: kind_df.stdout_lines
# ---- scale up ------------------------------------------------------------
- name: Get current replica count
ansible.builtin.command: >
kubectl get deployment {{ deployment_name }}
-n {{ k8s_namespace }}
-o jsonpath='{.spec.replicas}'
register: current_replicas
failed_when: false
changed_when: false
- name: Scale deployment to 1
ansible.builtin.command: >
kubectl scale deployment {{ deployment_name }}
-n {{ k8s_namespace }} --replicas=1
when: current_replicas.stdout | default('0') | int == 0
changed_when: true
- name: Wait for pod running
ansible.builtin.command: >
kubectl get pods -n {{ k8s_namespace }}
-l app={{ deployment_name }}
-o jsonpath='{.items[0].status.phase}'
register: pod_phase
changed_when: false
retries: 30
delay: 10
until: pod_phase.stdout == "Running"
- name: Report started
ansible.builtin.debug:
msg: >-
Validator started. Kind node: {{ kind_node }}.
Pod phase: {{ pod_phase.stdout }}.
PV mounts: XFS (zvol for ledger/snapshots/log, ram0 for accounts).

View File

@ -9,7 +9,7 @@
# ansible-playbook playbooks/connect-doublezero-multicast.yml --check # dry-run
- name: Connect biscayne to DoubleZero multicast
hosts: biscayne
hosts: all
gather_facts: false
vars:

View File

@ -0,0 +1,65 @@
#!/bin/bash
# Ashburn validator relay — runtime setup
#
# Called by ashburn-relay.service (After=docker.service) on boot.
# Idempotent — safe to run multiple times.
#
# Creates GRE tunnel, loopback IP, iptables rules, and policy routing
# so that validator traffic enters/exits via 137.239.194.65 (Ashburn).
set -euo pipefail
# GRE tunnel to mia-sw01
if ! ip tunnel show {{ tunnel_device }} 2>/dev/null; then
ip tunnel add {{ tunnel_device }} mode gre \
local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
ip link set {{ tunnel_device }} up mtu 8972
fi
# Ashburn IP on loopback (so kernel accepts inbound packets)
ip addr show lo | grep -q '{{ ashburn_ip }}' || \
ip addr add {{ ashburn_ip }}/32 dev lo
# Inbound DNAT (position 1, before Docker's ADDRTYPE LOCAL rule)
for rule in \
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p udp -d {{ ashburn_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j DNAT --to-destination {{ kind_node_ip }}" \
; do
if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then
iptables -t nat -I PREROUTING 1 $rule
fi
done
# Outbound mangle (fwmark for policy routing)
for rule in \
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }}" \
"-p udp -s {{ kind_network }} \
--sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j MARK --set-mark {{ fwmark }}" \
"-p tcp -s {{ kind_network }} --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }}" \
; do
if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then
iptables -t mangle -A PREROUTING $rule
fi
done
# Outbound SNAT (position 1, before Docker MASQUERADE)
snat_rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}"
if ! iptables -t nat -C POSTROUTING $snat_rule 2>/dev/null; then
iptables -t nat -I POSTROUTING 1 $snat_rule
fi
# Policy routing table
grep -q '^{{ rt_table_id }} {{ rt_table_name }}$' /etc/iproute2/rt_tables || \
echo "{{ rt_table_id }} {{ rt_table_name }}" >> /etc/iproute2/rt_tables
ip rule show | grep -q 'fwmark 0x64 lookup ashburn' || \
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
ip route replace default \
via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}

View File

@ -11,7 +11,7 @@
# ansible-playbook playbooks/health-check.yml -t network # just network checks
- name: Biscayne agave-stack health check
hosts: biscayne
hosts: all
gather_facts: false
environment:
KUBECONFIG: /home/rix/.kube/config
@ -249,16 +249,14 @@
ansible.builtin.shell:
cmd: |
set -o pipefail
echo "=== /mnt/solana contents ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/
echo "=== /mnt/solana filesystem ==="
docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana
echo "=== /mnt/solana/ramdisk filesystem ==="
docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana/ramdisk 2>/dev/null || echo "ramdisk not visible"
echo "=== /mnt/solana/snapshots ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/snapshots/ 2>/dev/null || echo "snapshots not visible"
echo "=== /mnt/solana/ledger ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ledger/ 2>/dev/null | head -5 || echo "ledger not visible"
echo "=== PV mount filesystems ==="
docker exec {{ kind_cluster }}-control-plane df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log 2>/dev/null || echo "PV mounts not visible"
echo "=== /mnt/validator-ledger ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-ledger/ 2>/dev/null | head -5 || echo "ledger not visible"
echo "=== /mnt/validator-snapshots ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-snapshots/ 2>/dev/null || echo "snapshots not visible"
echo "=== /mnt/validator-accounts ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-accounts/ 2>/dev/null || echo "accounts not visible"
executable: /bin/bash
register: kind_mounts
changed_when: false