fix: inventory layering — playbooks use hosts:all, cross-inventory uses explicit hosts

Normal playbooks should never hardcode hostnames — that's an inventory
concern. Changed all playbooks to hosts:all. The one exception is
ashburn-relay-check.yml which legitimately spans both inventories
(switches + biscayne) and uses explicit hostnames.

Also adds:
- ashburn-relay-check.yml: full-path relay diagnostics (switches + host)
- biscayne-start.yml: start kind container and scale validator to 1
- ashburn-relay-setup.sh.j2: boot persistence script for relay state
- Direct device mounts replacing rbind (ZFS shared propagation fix)
- systemd service replacing broken if-up.d/netfilter-persistent
- PV mount path corrections (/mnt/validator-* not /mnt/solana/*)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
fix/kind-mount-propagation
A. F. Dudley 2026-03-07 22:28:21 +00:00
parent 14c0f63775
commit 9cbc115295
10 changed files with 631 additions and 85 deletions

View File

@ -72,11 +72,10 @@ These units run before docker, so the kind node's bind mounts always see the
ramdisk. **No manual intervention is needed after reboot.** ramdisk. **No manual intervention is needed after reboot.**
**Mount propagation**: The kind node bind-mounts `/srv/kind``/mnt` at container **Mount propagation**: The kind node bind-mounts `/srv/kind``/mnt` at container
start. New mounts under `/srv/kind` on the host (like the rbind at start. laconic-so sets `propagation: HostToContainer` on all kind extraMounts
`/srv/kind/solana`) do NOT propagate into the kind node because kind's default (commit `a11d40f2` in stack-orchestrator), so host submounts (like the rbind at
mount propagation is `None`. A kind node restart is required to pick up new host `/srv/kind/solana`) propagate into the kind node automatically. A kind restart
mounts. **TODO**: Fix laconic-so to set `propagation: HostToContainer` on the is required to pick up the new config after updating laconic-so.
kind-mount-root extraMount, which would make host mounts propagate automatically.
### KUBECONFIG ### KUBECONFIG

View File

@ -26,7 +26,7 @@
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true # ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true
- name: Configure biscayne Ashburn validator relay - name: Configure biscayne Ashburn validator relay
hosts: biscayne hosts: all
gather_facts: false gather_facts: false
vars: vars:
@ -72,9 +72,18 @@
ansible.builtin.shell: ansible.builtin.shell:
cmd: | cmd: |
set -o pipefail set -o pipefail
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true --dport {{ gossip_port }} \
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
2>/dev/null || true
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \
--dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
2>/dev/null || true
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j DNAT --to-destination {{ kind_node_ip }} \
2>/dev/null || true
executable: /bin/bash executable: /bin/bash
changed_when: false changed_when: false
@ -82,9 +91,15 @@
ansible.builtin.shell: ansible.builtin.shell:
cmd: | cmd: |
set -o pipefail set -o pipefail
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true iptables -t mangle -D PREROUTING -s {{ kind_network }} \
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true -p udp --sport {{ gossip_port }} \
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p tcp --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
executable: /bin/bash executable: /bin/bash
changed_when: false changed_when: false
@ -102,15 +117,21 @@
executable: /bin/bash executable: /bin/bash
changed_when: false changed_when: false
- name: Persist cleaned iptables - name: Disable and remove ashburn-relay service
ansible.builtin.command: ansible.builtin.systemd:
cmd: netfilter-persistent save name: ashburn-relay.service
changed_when: true enabled: false
state: stopped
failed_when: false
- name: Remove if-up.d script - name: Remove ashburn-relay files
ansible.builtin.file: ansible.builtin.file:
path: /etc/network/if-up.d/ashburn-routing path: "{{ item }}"
state: absent state: absent
loop:
- /etc/systemd/system/ashburn-relay.service
- /usr/local/sbin/ashburn-relay-setup.sh
- /etc/network/if-up.d/ashburn-routing
- name: Rollback complete - name: Rollback complete
ansible.builtin.debug: ansible.builtin.debug:
@ -140,7 +161,7 @@
- name: Show existing iptables nat rules - name: Show existing iptables nat rules
ansible.builtin.shell: ansible.builtin.shell:
cmd: iptables -t nat -L -v -n --line-numbers | head -60 cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers | head -60
executable: /bin/bash executable: /bin/bash
register: existing_nat register: existing_nat
changed_when: false changed_when: false
@ -288,6 +309,7 @@
- name: Add policy routing rule for fwmark - name: Add policy routing rule for fwmark
ansible.builtin.shell: ansible.builtin.shell:
cmd: | cmd: |
set -o pipefail
if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then
echo "rule already exists" echo "rule already exists"
else else
@ -309,20 +331,51 @@
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Persistence # Persistence
# ------------------------------------------------------------------ # ------------------------------------------------------------------
- name: Save iptables rules # A systemd oneshot service replaces both if-up.d (which depends on
ansible.builtin.command: # networking.service, inactive on this host) and netfilter-persistent
cmd: netfilter-persistent save # (which runs before Docker, so Docker's chain setup blows away rules).
changed_when: true # This service runs After=docker.service and idempotently applies all
# tunnel, iptables, and policy routing state.
- name: Install ashburn-relay systemd service
ansible.builtin.copy:
dest: /etc/systemd/system/ashburn-relay.service
mode: "0644"
content: |
[Unit]
Description=Ashburn validator relay (GRE tunnel, iptables, policy routing)
After=docker.service network-online.target
Wants=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/local/sbin/ashburn-relay-setup.sh
[Install]
WantedBy=multi-user.target
register: relay_unit
tags: [inbound, outbound] tags: [inbound, outbound]
- name: Install if-up.d persistence script - name: Install ashburn-relay setup script
ansible.builtin.template: ansible.builtin.template:
src: files/ashburn-routing-ifup.sh.j2 src: files/ashburn-relay-setup.sh.j2
dest: /etc/network/if-up.d/ashburn-routing dest: /usr/local/sbin/ashburn-relay-setup.sh
mode: '0755' mode: "0755"
owner: root register: relay_script
group: root tags: [inbound, outbound]
tags: [outbound]
- name: Reload systemd and enable ashburn-relay
ansible.builtin.systemd:
name: ashburn-relay.service
daemon_reload: "{{ relay_unit.changed or relay_script.changed }}"
enabled: true
tags: [inbound, outbound]
- name: Remove stale if-up.d script
ansible.builtin.file:
path: /etc/network/if-up.d/ashburn-routing
state: absent
tags: [inbound, outbound]
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Verification # Verification
@ -345,7 +398,7 @@
- name: Show NAT rules - name: Show NAT rules
ansible.builtin.shell: ansible.builtin.shell:
cmd: iptables -t nat -L -v -n --line-numbers 2>&1 | head -40 cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers 2>&1 | head -40
executable: /bin/bash executable: /bin/bash
register: nat_rules register: nat_rules
changed_when: false changed_when: false
@ -374,7 +427,7 @@
- name: Show loopback addresses - name: Show loopback addresses
ansible.builtin.shell: ansible.builtin.shell:
cmd: ip addr show lo | grep inet cmd: set -o pipefail && ip addr show lo | grep inet
executable: /bin/bash executable: /bin/bash
register: lo_addrs register: lo_addrs
changed_when: false changed_when: false

View File

@ -0,0 +1,251 @@
---
# Ashburn relay health check — full path verification
#
# Cross-inventory playbook: checks was-sw01, mia-sw01, and biscayne.
# All tasks are read-only — safe to run at any time.
#
# Usage:
# ansible-playbook -i inventory-switches/switches.yml \
# -i inventory/biscayne.yml playbooks/ashburn-relay-check.yml
- name: Check was-sw01 relay config
hosts: was-sw01
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
tasks:
- name: Check loopback interfaces
arista.eos.eos_command:
commands:
- show ip interface brief | include Loopback
register: was_loopbacks
changed_when: false
- name: Check route for ashburn IP
arista.eos.eos_command:
commands:
- "show ip route {{ ashburn_ip }}"
register: was_route
changed_when: false
- name: Check Et1/1 config
arista.eos.eos_command:
commands:
- show running-config interfaces Ethernet1/1
register: was_et1
changed_when: false
- name: Check traffic-policies
arista.eos.eos_command:
commands:
- "show running-config | section traffic-policy"
register: was_traffic_policy
changed_when: false
- name: Check system-rule
arista.eos.eos_command:
commands:
- "show running-config | include system-rule"
register: was_system_rule
changed_when: false
- name: Check monitor sessions
arista.eos.eos_command:
commands:
- show monitor session
register: was_monitor
changed_when: false
- name: Check backbone interface
arista.eos.eos_command:
commands:
- show interfaces Ethernet4/1 status
register: was_backbone
changed_when: false
- name: Show was-sw01 relay status
ansible.builtin.debug:
msg:
loopbacks: "{{ was_loopbacks.stdout_lines[0] }}"
route_to_ashburn_ip: "{{ was_route.stdout_lines[0] }}"
et1_config: "{{ was_et1.stdout_lines[0] }}"
traffic_policy: "{{ was_traffic_policy.stdout[0] | default('none') }}"
system_rule: "{{ was_system_rule.stdout[0] | default('none') }}"
monitor_sessions: "{{ was_monitor.stdout_lines[0] }}"
backbone: "{{ was_backbone.stdout_lines[0] }}"
- name: Check mia-sw01 relay config
hosts: mia-sw01
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
tasks:
- name: Check tunnel interfaces
arista.eos.eos_command:
commands:
- show ip interface brief | include Tunnel
register: mia_tunnels
changed_when: false
- name: Check Tunnel100 config
arista.eos.eos_command:
commands:
- show running-config interfaces Tunnel100
register: mia_tunnel100
changed_when: false
- name: Check Tunnel100 ACL
arista.eos.eos_command:
commands:
- show ip access-lists SEC-VALIDATOR-100-IN
register: mia_acl
changed_when: false
- name: Check route for ashburn IP
arista.eos.eos_command:
commands:
- "show ip route {{ ashburn_ip }}"
register: mia_route
changed_when: false
- name: Check traffic-policies
arista.eos.eos_command:
commands:
- "show running-config | section traffic-policy"
register: mia_traffic_policy
changed_when: false
- name: Check system-rule
arista.eos.eos_command:
commands:
- "show running-config | include system-rule"
register: mia_system_rule
changed_when: false
- name: Check backbone interface
arista.eos.eos_command:
commands:
- show interfaces Ethernet4/1 status
register: mia_backbone
changed_when: false
- name: Show mia-sw01 relay status
ansible.builtin.debug:
msg:
tunnels: "{{ mia_tunnels.stdout_lines[0] }}"
tunnel100_config: "{{ mia_tunnel100.stdout_lines[0] }}"
tunnel100_acl: "{{ mia_acl.stdout_lines[0] }}"
route_to_ashburn_ip: "{{ mia_route.stdout_lines[0] }}"
traffic_policy: "{{ mia_traffic_policy.stdout[0] | default('none') }}"
system_rule: "{{ mia_system_rule.stdout[0] | default('none') }}"
backbone: "{{ mia_backbone.stdout_lines[0] }}"
- name: Check biscayne relay state
hosts: biscayne
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
tunnel_device: gre-ashburn
tunnel_remote_ip: 169.254.100.0
tasks:
- name: Check GRE tunnel
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel not found"
executable: /bin/bash
register: biscayne_tunnel
changed_when: false
- name: Check loopback IP
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip addr show lo | grep '{{ ashburn_ip }}' || echo "not configured"
executable: /bin/bash
register: biscayne_lo
changed_when: false
- name: Check iptables DNAT rules
ansible.builtin.shell:
cmd: >
set -o pipefail &&
iptables -t nat -L PREROUTING -v -n | grep '{{ ashburn_ip }}'
|| echo "no DNAT rules"
executable: /bin/bash
register: biscayne_dnat
changed_when: false
become: true
- name: Check iptables mangle rules
ansible.builtin.shell:
cmd: >
set -o pipefail &&
iptables -t mangle -L PREROUTING -v -n | grep 'MARK'
|| echo "no mangle rules"
executable: /bin/bash
register: biscayne_mangle
changed_when: false
become: true
- name: Check iptables SNAT rule
ansible.builtin.shell:
cmd: >
set -o pipefail &&
iptables -t nat -L POSTROUTING -v -n | grep '{{ ashburn_ip }}'
|| echo "no SNAT rule"
executable: /bin/bash
register: biscayne_snat
changed_when: false
become: true
- name: Check policy routing
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip rule show | grep ashburn || echo "no policy rule"
executable: /bin/bash
register: biscayne_policy
changed_when: false
- name: Check ashburn routing table
ansible.builtin.shell:
cmd: >
set -o pipefail &&
ip route show table ashburn 2>&1 || echo "table not found"
executable: /bin/bash
register: biscayne_table
changed_when: false
- name: Check tunnel ping
ansible.builtin.command:
cmd: "ping -c 2 -W 2 {{ tunnel_remote_ip }}"
register: biscayne_ping
changed_when: false
failed_when: false
- name: Check ashburn-relay service
ansible.builtin.systemd:
name: ashburn-relay.service
register: biscayne_service
check_mode: true
failed_when: false
- name: Show biscayne relay status
ansible.builtin.debug:
msg:
gre_tunnel: "{{ biscayne_tunnel.stdout }}"
loopback_ip: "{{ biscayne_lo.stdout }}"
dnat_rules: "{{ biscayne_dnat.stdout_lines }}"
mangle_rules: "{{ biscayne_mangle.stdout_lines }}"
snat_rule: "{{ biscayne_snat.stdout_lines }}"
policy_routing: "{{ biscayne_policy.stdout }}"
routing_table: "{{ biscayne_table.stdout }}"
tunnel_ping: "{{ 'OK (' + biscayne_ping.stdout_lines[-1] + ')' if biscayne_ping.rc == 0 else 'FAILED' }}"
systemd_service: "{{ biscayne_service.status.ActiveState | default('not installed') }}"

View File

@ -29,7 +29,7 @@
# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true
- name: Configure mia-sw01 validator relay tunnel - name: Configure mia-sw01 validator relay tunnel
hosts: mia-sw01 hosts: all
gather_facts: false gather_facts: false
vars: vars:

View File

@ -19,7 +19,7 @@
# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e rollback=true # ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e rollback=true
- name: Configure was-sw01 inbound validator relay - name: Configure was-sw01 inbound validator relay
hosts: was-sw01 hosts: all
gather_facts: false gather_facts: false
vars: vars:

View File

@ -23,12 +23,13 @@
# mounting each boot. # mounting each boot.
# Persisted as: format-ramdisk.service (mkfs before mount) + fstab entry # Persisted as: format-ramdisk.service (mkfs before mount) + fstab entry
# #
# Invariant 3: /srv/kind/solana is an rbind of /srv/solana # Invariant 3: /srv/kind/solana is XFS (zvol) and /srv/kind/solana/ramdisk is XFS (ram0)
# Why: kind mounts /srv/kind → /mnt inside the kind node. PVs reference # Why: kind mounts /srv/kind → /mnt inside the kind node. PVs reference
# /mnt/solana/*. Without the rbind, /srv/kind/solana resolves to the ZFS # /mnt/solana/*. An rbind of /srv/solana does NOT work because ZFS's
# dataset (biscayne/DATA/srv/kind), not the zvol — violating invariant 1. # shared propagation (shared:75 on /srv) overlays ZFS on top of the bind.
# Persisted as: fstab entry with x-systemd.requires=zfs-mount.service # Direct device mounts bypass propagation entirely.
# (must mount AFTER ZFS, or ZFS overlay at /srv/kind hides it) # Persisted as: two fstab entries — zvol at /srv/kind/solana, ram0 at
# /srv/kind/solana/ramdisk, both with x-systemd.requires ordering
# #
# This playbook checks each invariant and only acts if it's not met. # This playbook checks each invariant and only acts if it's not met.
# Idempotent — safe to run multiple times. # Idempotent — safe to run multiple times.
@ -48,6 +49,7 @@
kind_solana_dir: /srv/kind/solana kind_solana_dir: /srv/kind/solana
accounts_dir: /srv/solana/ramdisk/accounts accounts_dir: /srv/solana/ramdisk/accounts
deployment_dir: /srv/deployments/agave deployment_dir: /srv/deployments/agave
kind_ramdisk_opts: "noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service,x-systemd.requires=srv-kind-solana.mount"
tasks: tasks:
# ---- systemd units ---------------------------------------------------------- # ---- systemd units ----------------------------------------------------------
@ -106,30 +108,30 @@
line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0' line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0'
register: fstab_ramdisk register: fstab_ramdisk
# rbind /srv/solana to /srv/kind/solana AFTER zfs-mount.service and ramdisk. # Direct device mounts at /srv/kind/solana — bypasses ZFS shared propagation.
# Without this ordering, ZFS overlay at /srv/kind hides the bind mount. # An rbind of /srv/solana fails because ZFS's shared:75 on /srv overlays
- name: Ensure kind bind mount fstab entry # ZFS on top of any bind mount under /srv. Direct device mounts avoid this.
- name: Ensure kind zvol fstab entry
ansible.builtin.lineinfile: ansible.builtin.lineinfile:
path: /etc/fstab path: /etc/fstab
regexp: '^\S+\s+{{ kind_solana_dir }}\s' regexp: '^\S+\s+{{ kind_solana_dir }}\s'
line: '{{ solana_dir }} {{ kind_solana_dir }} none rbind,nofail,x-systemd.requires=zfs-mount.service,x-systemd.requires=srv-solana-ramdisk.mount 0 0' line: '{{ zvol_device }} {{ kind_solana_dir }} xfs defaults,nofail,x-systemd.requires=zfs-mount.service 0 0'
register: fstab_kind register: fstab_kind
# Remove stale fstab entries from previous attempts (direct zvol mount, - name: Ensure kind ramdisk fstab entry
# separate ramdisk mount at /srv/kind/solana/ramdisk)
- name: Remove stale kind zvol fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^{{ zvol_device }}\s+{{ kind_solana_dir }}\s'
state: absent
register: fstab_stale_zvol
- name: Remove stale kind ramdisk fstab entry
ansible.builtin.lineinfile: ansible.builtin.lineinfile:
path: /etc/fstab path: /etc/fstab
regexp: '^\S+\s+{{ kind_solana_dir }}/ramdisk\s' regexp: '^\S+\s+{{ kind_solana_dir }}/ramdisk\s'
line: "{{ ramdisk_device }} {{ kind_solana_dir }}/ramdisk xfs {{ kind_ramdisk_opts }} 0 0"
register: fstab_kind_ramdisk
# Remove stale rbind fstab entry from previous approach
- name: Remove stale kind rbind fstab entry
ansible.builtin.lineinfile:
path: /etc/fstab
regexp: '^\S+\s+{{ kind_solana_dir }}\s+none\s+rbind'
state: absent state: absent
register: fstab_stale_ramdisk register: fstab_stale_rbind
# ---- reload and enable ------------------------------------------------------ # ---- reload and enable ------------------------------------------------------
- name: Reload systemd - name: Reload systemd
@ -137,8 +139,9 @@
daemon_reload: true daemon_reload: true
when: >- when: >-
unit_file.changed or accounts_unit.changed or unit_file.changed or accounts_unit.changed or
fstab_zvol.changed or fstab_ramdisk.changed or fstab_kind.changed or fstab_zvol.changed or fstab_ramdisk.changed or
fstab_stale_zvol.changed or fstab_stale_ramdisk.changed fstab_kind.changed or fstab_kind_ramdisk.changed or
fstab_stale_rbind.changed
- name: Enable ramdisk services - name: Enable ramdisk services
ansible.builtin.systemd: ansible.builtin.systemd:
@ -164,14 +167,14 @@
changed_when: ramdisk_mounted.rc != 0 changed_when: ramdisk_mounted.rc != 0
when: ramdisk_mounted.rc != 0 when: ramdisk_mounted.rc != 0
# ---- apply kind bind mount now if not correct ------------------------------ # ---- apply kind device mounts now if not correct ----------------------------
- name: Check kind bind mount - name: Check kind zvol mount is XFS
ansible.builtin.shell: ansible.builtin.shell:
cmd: > cmd: >
set -o pipefail && set -o pipefail &&
findmnt -n -o SOURCE {{ kind_solana_dir }} | grep -q '{{ solana_dir }}' findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q xfs
executable: /bin/bash executable: /bin/bash
register: kind_mount_check register: kind_zvol_check
failed_when: false failed_when: false
changed_when: false changed_when: false
@ -181,17 +184,47 @@
umount {{ kind_solana_dir }}/ramdisk 2>/dev/null || true umount {{ kind_solana_dir }}/ramdisk 2>/dev/null || true
umount {{ kind_solana_dir }} 2>/dev/null || true umount {{ kind_solana_dir }} 2>/dev/null || true
executable: /bin/bash executable: /bin/bash
changed_when: kind_mount_check.rc != 0 changed_when: kind_zvol_check.rc != 0
when: kind_mount_check.rc != 0 when: kind_zvol_check.rc != 0
- name: Apply kind bind mount now - name: Mount zvol at kind solana dir
ansible.posix.mount: ansible.posix.mount:
path: "{{ kind_solana_dir }}" path: "{{ kind_solana_dir }}"
src: "{{ solana_dir }}" src: "{{ zvol_device }}"
fstype: none fstype: xfs
opts: rbind
state: mounted state: mounted
when: kind_mount_check.rc != 0 when: kind_zvol_check.rc != 0
- name: Check kind ramdisk mount is XFS
ansible.builtin.shell:
cmd: >
set -o pipefail &&
findmnt -n -o FSTYPE {{ kind_solana_dir }}/ramdisk | grep -q xfs
executable: /bin/bash
register: kind_ramdisk_check
failed_when: false
changed_when: false
- name: Mount ramdisk at kind solana ramdisk dir
ansible.posix.mount:
path: "{{ kind_solana_dir }}/ramdisk"
src: "{{ ramdisk_device }}"
fstype: xfs
opts: noatime,nodiratime
state: mounted
when: kind_ramdisk_check.rc != 0
# Docker requires shared propagation on mounts it bind-mounts into
# containers. Without this, `docker start` fails with "not a shared
# or slave mount".
# No ansible module supports mount propagation flags; command required.
- name: Ensure shared propagation on kind mounts # noqa: command-instead-of-module
ansible.builtin.command:
cmd: mount --make-shared {{ item }}
loop:
- "{{ kind_solana_dir }}"
- "{{ kind_solana_dir }}/ramdisk"
changed_when: false
# ---- verify ----------------------------------------------------------------- # ---- verify -----------------------------------------------------------------
- name: Verify ramdisk is XFS - name: Verify ramdisk is XFS
@ -206,7 +239,19 @@
executable: /bin/bash executable: /bin/bash
changed_when: false changed_when: false
- name: Verify kind bind mount contents - name: Verify kind zvol is XFS
ansible.builtin.shell:
cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q xfs
executable: /bin/bash
changed_when: false
- name: Verify kind ramdisk is XFS
ansible.builtin.shell:
cmd: set -o pipefail && df -T {{ kind_solana_dir }}/ramdisk | grep -q xfs
executable: /bin/bash
changed_when: false
- name: Verify kind mount contents
ansible.builtin.shell: ansible.builtin.shell:
cmd: > cmd: >
set -o pipefail && set -o pipefail &&
@ -216,8 +261,11 @@
register: kind_mount_verify register: kind_mount_verify
changed_when: false changed_when: false
# Assert the kind node sees XFS (zvol), not ZFS. If this fails, kind # Assert the kind node sees XFS at the PV mount paths.
# needs a restart or laconic-so needs the HostToContainer propagation fix. # laconic-so creates individual extraMounts per volume:
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
# The PV hostPaths use /mnt/<volume-name>, not /mnt/solana/<subpath>.
- name: Read cluster-id from deployment - name: Read cluster-id from deployment
ansible.builtin.shell: ansible.builtin.shell:
cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}' cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}'
@ -225,12 +273,13 @@
register: cluster_id_result register: cluster_id_result
changed_when: false changed_when: false
- name: Verify kind node sees XFS at /mnt/solana - name: Check kind node XFS visibility
ansible.builtin.shell: ansible.builtin.shell:
cmd: > cmd: >
set -o pipefail && set -o pipefail &&
docker exec {{ cluster_id_result.stdout }}-control-plane docker exec {{ cluster_id_result.stdout }}-control-plane
stat -f -c '%T' /mnt/solana | grep -q xfs df -T /mnt/validator-ledger /mnt/validator-accounts
| grep -c xfs
executable: /bin/bash executable: /bin/bash
register: kind_fstype register: kind_fstype
changed_when: false changed_when: false
@ -240,4 +289,7 @@
ansible.builtin.debug: ansible.builtin.debug:
msg: msg:
kind_mount: "{{ kind_mount_verify.stdout_lines }}" kind_mount: "{{ kind_mount_verify.stdout_lines }}"
kind_fstype: "{{ 'xfs (correct)' if kind_fstype.rc == 0 else 'NOT XFS — kind restart required' }}" kind_fstype: "{{ 'xfs (correct)' if kind_fstype.stdout | default('0') | int >= 2 else 'NOT XFS — kind restart required' }}"
- name: Configure Ashburn validator relay
ansible.builtin.import_playbook: ashburn-relay-biscayne.yml

View File

@ -0,0 +1,128 @@
---
# Start agave validator on biscayne
#
# Ensures the kind container is running, verifies XFS mounts are visible
# inside the kind node, then scales the deployment to 1.
#
# Prerequisites:
# - biscayne-prepare-agave.yml has been run (fstab entries, systemd units)
# - A snapshot exists in /srv/solana/snapshots (or use biscayne-recover.yml)
#
# Usage:
# ansible-playbook playbooks/biscayne-start.yml
#
- name: Start agave validator
hosts: all
gather_facts: false
environment:
KUBECONFIG: /home/rix/.kube/config
vars:
deployment_dir: /srv/deployments/agave
tasks:
# ---- discover cluster id -------------------------------------------------
- name: Read cluster-id from deployment
ansible.builtin.shell:
cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}'
executable: /bin/bash
register: cluster_id_result
changed_when: false
- name: Set cluster facts
ansible.builtin.set_fact:
kind_cluster: "{{ cluster_id_result.stdout }}"
kind_node: "{{ cluster_id_result.stdout }}-control-plane"
k8s_namespace: "laconic-{{ cluster_id_result.stdout }}"
deployment_name: "{{ cluster_id_result.stdout }}-deployment"
# ---- ensure kind container is running ------------------------------------
- name: Check kind container state
ansible.builtin.command: docker inspect -f '{% raw %}{{ .State.Running }}{% endraw %}' {{ kind_node }}
register: kind_running
failed_when: false
changed_when: false
- name: Start kind container
ansible.builtin.command: docker start {{ kind_node }}
when: kind_running.stdout | default('false') != 'true'
changed_when: true
- name: Wait for kind node ready
ansible.builtin.command: >
kubectl get node {{ kind_node }}
-o jsonpath='{.status.conditions[?(@.type=="Ready")].status}'
register: node_ready
changed_when: false
retries: 30
delay: 10
until: node_ready.stdout == "True"
# ---- verify mounts inside kind node --------------------------------------
# laconic-so creates individual extraMounts per volume:
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
- name: Verify kind node sees XFS at PV paths
ansible.builtin.shell:
cmd: >
set -o pipefail &&
docker exec {{ kind_node }}
df -T /mnt/validator-ledger /mnt/validator-accounts
| grep -c xfs
executable: /bin/bash
register: kind_xfs_check
changed_when: false
- name: Fail if PV paths are not XFS
ansible.builtin.fail:
msg: >-
Expected 2 XFS mounts (validator-ledger, validator-accounts) but
found {{ kind_xfs_check.stdout }}. Run biscayne-prepare-agave.yml
and restart the kind container.
when: kind_xfs_check.stdout | int < 2
- name: Show kind node PV filesystems
ansible.builtin.shell:
cmd: |
docker exec {{ kind_node }} df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log
executable: /bin/bash
register: kind_df
changed_when: false
- name: Show kind mount info
ansible.builtin.debug:
var: kind_df.stdout_lines
# ---- scale up ------------------------------------------------------------
- name: Get current replica count
ansible.builtin.command: >
kubectl get deployment {{ deployment_name }}
-n {{ k8s_namespace }}
-o jsonpath='{.spec.replicas}'
register: current_replicas
failed_when: false
changed_when: false
- name: Scale deployment to 1
ansible.builtin.command: >
kubectl scale deployment {{ deployment_name }}
-n {{ k8s_namespace }} --replicas=1
when: current_replicas.stdout | default('0') | int == 0
changed_when: true
- name: Wait for pod running
ansible.builtin.command: >
kubectl get pods -n {{ k8s_namespace }}
-l app={{ deployment_name }}
-o jsonpath='{.items[0].status.phase}'
register: pod_phase
changed_when: false
retries: 30
delay: 10
until: pod_phase.stdout == "Running"
- name: Report started
ansible.builtin.debug:
msg: >-
Validator started. Kind node: {{ kind_node }}.
Pod phase: {{ pod_phase.stdout }}.
PV mounts: XFS (zvol for ledger/snapshots/log, ram0 for accounts).

View File

@ -9,7 +9,7 @@
# ansible-playbook playbooks/connect-doublezero-multicast.yml --check # dry-run # ansible-playbook playbooks/connect-doublezero-multicast.yml --check # dry-run
- name: Connect biscayne to DoubleZero multicast - name: Connect biscayne to DoubleZero multicast
hosts: biscayne hosts: all
gather_facts: false gather_facts: false
vars: vars:

View File

@ -0,0 +1,65 @@
#!/bin/bash
# Ashburn validator relay — runtime setup
#
# Called by ashburn-relay.service (After=docker.service) on boot.
# Idempotent — safe to run multiple times.
#
# Creates GRE tunnel, loopback IP, iptables rules, and policy routing
# so that validator traffic enters/exits via 137.239.194.65 (Ashburn).
set -euo pipefail
# GRE tunnel to mia-sw01
if ! ip tunnel show {{ tunnel_device }} 2>/dev/null; then
ip tunnel add {{ tunnel_device }} mode gre \
local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
ip link set {{ tunnel_device }} up mtu 8972
fi
# Ashburn IP on loopback (so kernel accepts inbound packets)
ip addr show lo | grep -q '{{ ashburn_ip }}' || \
ip addr add {{ ashburn_ip }}/32 dev lo
# Inbound DNAT (position 1, before Docker's ADDRTYPE LOCAL rule)
for rule in \
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p udp -d {{ ashburn_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j DNAT --to-destination {{ kind_node_ip }}" \
; do
if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then
iptables -t nat -I PREROUTING 1 $rule
fi
done
# Outbound mangle (fwmark for policy routing)
for rule in \
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }}" \
"-p udp -s {{ kind_network }} \
--sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j MARK --set-mark {{ fwmark }}" \
"-p tcp -s {{ kind_network }} --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }}" \
; do
if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then
iptables -t mangle -A PREROUTING $rule
fi
done
# Outbound SNAT (position 1, before Docker MASQUERADE)
snat_rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}"
if ! iptables -t nat -C POSTROUTING $snat_rule 2>/dev/null; then
iptables -t nat -I POSTROUTING 1 $snat_rule
fi
# Policy routing table
grep -q '^{{ rt_table_id }} {{ rt_table_name }}$' /etc/iproute2/rt_tables || \
echo "{{ rt_table_id }} {{ rt_table_name }}" >> /etc/iproute2/rt_tables
ip rule show | grep -q 'fwmark 0x64 lookup ashburn' || \
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
ip route replace default \
via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}

View File

@ -11,7 +11,7 @@
# ansible-playbook playbooks/health-check.yml -t network # just network checks # ansible-playbook playbooks/health-check.yml -t network # just network checks
- name: Biscayne agave-stack health check - name: Biscayne agave-stack health check
hosts: biscayne hosts: all
gather_facts: false gather_facts: false
environment: environment:
KUBECONFIG: /home/rix/.kube/config KUBECONFIG: /home/rix/.kube/config
@ -249,16 +249,14 @@
ansible.builtin.shell: ansible.builtin.shell:
cmd: | cmd: |
set -o pipefail set -o pipefail
echo "=== /mnt/solana contents ===" echo "=== PV mount filesystems ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ docker exec {{ kind_cluster }}-control-plane df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log 2>/dev/null || echo "PV mounts not visible"
echo "=== /mnt/solana filesystem ===" echo "=== /mnt/validator-ledger ==="
docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-ledger/ 2>/dev/null | head -5 || echo "ledger not visible"
echo "=== /mnt/solana/ramdisk filesystem ===" echo "=== /mnt/validator-snapshots ==="
docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana/ramdisk 2>/dev/null || echo "ramdisk not visible" docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-snapshots/ 2>/dev/null || echo "snapshots not visible"
echo "=== /mnt/solana/snapshots ===" echo "=== /mnt/validator-accounts ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/snapshots/ 2>/dev/null || echo "snapshots not visible" docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-accounts/ 2>/dev/null || echo "accounts not visible"
echo "=== /mnt/solana/ledger ==="
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ledger/ 2>/dev/null | head -5 || echo "ledger not visible"
executable: /bin/bash executable: /bin/bash
register: kind_mounts register: kind_mounts
changed_when: false changed_when: false