fix: inventory layering — playbooks use hosts:all, cross-inventory uses explicit hosts
Normal playbooks should never hardcode hostnames — that's an inventory concern. Changed all playbooks to hosts:all. The one exception is ashburn-relay-check.yml which legitimately spans both inventories (switches + biscayne) and uses explicit hostnames. Also adds: - ashburn-relay-check.yml: full-path relay diagnostics (switches + host) - biscayne-start.yml: start kind container and scale validator to 1 - ashburn-relay-setup.sh.j2: boot persistence script for relay state - Direct device mounts replacing rbind (ZFS shared propagation fix) - systemd service replacing broken if-up.d/netfilter-persistent - PV mount path corrections (/mnt/validator-* not /mnt/solana/*) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>fix/kind-mount-propagation
parent
14c0f63775
commit
9cbc115295
|
|
@ -72,11 +72,10 @@ These units run before docker, so the kind node's bind mounts always see the
|
|||
ramdisk. **No manual intervention is needed after reboot.**
|
||||
|
||||
**Mount propagation**: The kind node bind-mounts `/srv/kind` → `/mnt` at container
|
||||
start. New mounts under `/srv/kind` on the host (like the rbind at
|
||||
`/srv/kind/solana`) do NOT propagate into the kind node because kind's default
|
||||
mount propagation is `None`. A kind node restart is required to pick up new host
|
||||
mounts. **TODO**: Fix laconic-so to set `propagation: HostToContainer` on the
|
||||
kind-mount-root extraMount, which would make host mounts propagate automatically.
|
||||
start. laconic-so sets `propagation: HostToContainer` on all kind extraMounts
|
||||
(commit `a11d40f2` in stack-orchestrator), so host submounts (like the rbind at
|
||||
`/srv/kind/solana`) propagate into the kind node automatically. A kind restart
|
||||
is required to pick up the new config after updating laconic-so.
|
||||
|
||||
### KUBECONFIG
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true
|
||||
|
||||
- name: Configure biscayne Ashburn validator relay
|
||||
hosts: biscayne
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
|
|
@ -72,9 +72,18 @@
|
|||
ansible.builtin.shell:
|
||||
cmd: |
|
||||
set -o pipefail
|
||||
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
|
||||
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
|
||||
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true
|
||||
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
|
||||
--dport {{ gossip_port }} \
|
||||
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
|
||||
2>/dev/null || true
|
||||
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \
|
||||
--dport {{ gossip_port }} \
|
||||
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
|
||||
2>/dev/null || true
|
||||
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
|
||||
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
||||
-j DNAT --to-destination {{ kind_node_ip }} \
|
||||
2>/dev/null || true
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
|
||||
|
|
@ -82,9 +91,15 @@
|
|||
ansible.builtin.shell:
|
||||
cmd: |
|
||||
set -o pipefail
|
||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} -p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
||||
-p udp --sport {{ gossip_port }} \
|
||||
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
||||
-p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
||||
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
||||
-p tcp --sport {{ gossip_port }} \
|
||||
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
|
||||
|
|
@ -102,15 +117,21 @@
|
|||
executable: /bin/bash
|
||||
changed_when: false
|
||||
|
||||
- name: Persist cleaned iptables
|
||||
ansible.builtin.command:
|
||||
cmd: netfilter-persistent save
|
||||
changed_when: true
|
||||
- name: Disable and remove ashburn-relay service
|
||||
ansible.builtin.systemd:
|
||||
name: ashburn-relay.service
|
||||
enabled: false
|
||||
state: stopped
|
||||
failed_when: false
|
||||
|
||||
- name: Remove if-up.d script
|
||||
- name: Remove ashburn-relay files
|
||||
ansible.builtin.file:
|
||||
path: /etc/network/if-up.d/ashburn-routing
|
||||
path: "{{ item }}"
|
||||
state: absent
|
||||
loop:
|
||||
- /etc/systemd/system/ashburn-relay.service
|
||||
- /usr/local/sbin/ashburn-relay-setup.sh
|
||||
- /etc/network/if-up.d/ashburn-routing
|
||||
|
||||
- name: Rollback complete
|
||||
ansible.builtin.debug:
|
||||
|
|
@ -140,7 +161,7 @@
|
|||
|
||||
- name: Show existing iptables nat rules
|
||||
ansible.builtin.shell:
|
||||
cmd: iptables -t nat -L -v -n --line-numbers | head -60
|
||||
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers | head -60
|
||||
executable: /bin/bash
|
||||
register: existing_nat
|
||||
changed_when: false
|
||||
|
|
@ -288,6 +309,7 @@
|
|||
- name: Add policy routing rule for fwmark
|
||||
ansible.builtin.shell:
|
||||
cmd: |
|
||||
set -o pipefail
|
||||
if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then
|
||||
echo "rule already exists"
|
||||
else
|
||||
|
|
@ -309,20 +331,51 @@
|
|||
# ------------------------------------------------------------------
|
||||
# Persistence
|
||||
# ------------------------------------------------------------------
|
||||
- name: Save iptables rules
|
||||
ansible.builtin.command:
|
||||
cmd: netfilter-persistent save
|
||||
changed_when: true
|
||||
# A systemd oneshot service replaces both if-up.d (which depends on
|
||||
# networking.service, inactive on this host) and netfilter-persistent
|
||||
# (which runs before Docker, so Docker's chain setup blows away rules).
|
||||
# This service runs After=docker.service and idempotently applies all
|
||||
# tunnel, iptables, and policy routing state.
|
||||
- name: Install ashburn-relay systemd service
|
||||
ansible.builtin.copy:
|
||||
dest: /etc/systemd/system/ashburn-relay.service
|
||||
mode: "0644"
|
||||
content: |
|
||||
[Unit]
|
||||
Description=Ashburn validator relay (GRE tunnel, iptables, policy routing)
|
||||
After=docker.service network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/usr/local/sbin/ashburn-relay-setup.sh
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
register: relay_unit
|
||||
tags: [inbound, outbound]
|
||||
|
||||
- name: Install if-up.d persistence script
|
||||
- name: Install ashburn-relay setup script
|
||||
ansible.builtin.template:
|
||||
src: files/ashburn-routing-ifup.sh.j2
|
||||
dest: /etc/network/if-up.d/ashburn-routing
|
||||
mode: '0755'
|
||||
owner: root
|
||||
group: root
|
||||
tags: [outbound]
|
||||
src: files/ashburn-relay-setup.sh.j2
|
||||
dest: /usr/local/sbin/ashburn-relay-setup.sh
|
||||
mode: "0755"
|
||||
register: relay_script
|
||||
tags: [inbound, outbound]
|
||||
|
||||
- name: Reload systemd and enable ashburn-relay
|
||||
ansible.builtin.systemd:
|
||||
name: ashburn-relay.service
|
||||
daemon_reload: "{{ relay_unit.changed or relay_script.changed }}"
|
||||
enabled: true
|
||||
tags: [inbound, outbound]
|
||||
|
||||
- name: Remove stale if-up.d script
|
||||
ansible.builtin.file:
|
||||
path: /etc/network/if-up.d/ashburn-routing
|
||||
state: absent
|
||||
tags: [inbound, outbound]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Verification
|
||||
|
|
@ -345,7 +398,7 @@
|
|||
|
||||
- name: Show NAT rules
|
||||
ansible.builtin.shell:
|
||||
cmd: iptables -t nat -L -v -n --line-numbers 2>&1 | head -40
|
||||
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers 2>&1 | head -40
|
||||
executable: /bin/bash
|
||||
register: nat_rules
|
||||
changed_when: false
|
||||
|
|
@ -374,7 +427,7 @@
|
|||
|
||||
- name: Show loopback addresses
|
||||
ansible.builtin.shell:
|
||||
cmd: ip addr show lo | grep inet
|
||||
cmd: set -o pipefail && ip addr show lo | grep inet
|
||||
executable: /bin/bash
|
||||
register: lo_addrs
|
||||
changed_when: false
|
||||
|
|
|
|||
|
|
@ -0,0 +1,251 @@
|
|||
---
|
||||
# Ashburn relay health check — full path verification
|
||||
#
|
||||
# Cross-inventory playbook: checks was-sw01, mia-sw01, and biscayne.
|
||||
# All tasks are read-only — safe to run at any time.
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook -i inventory-switches/switches.yml \
|
||||
# -i inventory/biscayne.yml playbooks/ashburn-relay-check.yml
|
||||
|
||||
- name: Check was-sw01 relay config
|
||||
hosts: was-sw01
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
ashburn_ip: 137.239.194.65
|
||||
|
||||
tasks:
|
||||
- name: Check loopback interfaces
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show ip interface brief | include Loopback
|
||||
register: was_loopbacks
|
||||
changed_when: false
|
||||
|
||||
- name: Check route for ashburn IP
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- "show ip route {{ ashburn_ip }}"
|
||||
register: was_route
|
||||
changed_when: false
|
||||
|
||||
- name: Check Et1/1 config
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show running-config interfaces Ethernet1/1
|
||||
register: was_et1
|
||||
changed_when: false
|
||||
|
||||
- name: Check traffic-policies
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- "show running-config | section traffic-policy"
|
||||
register: was_traffic_policy
|
||||
changed_when: false
|
||||
|
||||
- name: Check system-rule
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- "show running-config | include system-rule"
|
||||
register: was_system_rule
|
||||
changed_when: false
|
||||
|
||||
- name: Check monitor sessions
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show monitor session
|
||||
register: was_monitor
|
||||
changed_when: false
|
||||
|
||||
- name: Check backbone interface
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show interfaces Ethernet4/1 status
|
||||
register: was_backbone
|
||||
changed_when: false
|
||||
|
||||
- name: Show was-sw01 relay status
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
loopbacks: "{{ was_loopbacks.stdout_lines[0] }}"
|
||||
route_to_ashburn_ip: "{{ was_route.stdout_lines[0] }}"
|
||||
et1_config: "{{ was_et1.stdout_lines[0] }}"
|
||||
traffic_policy: "{{ was_traffic_policy.stdout[0] | default('none') }}"
|
||||
system_rule: "{{ was_system_rule.stdout[0] | default('none') }}"
|
||||
monitor_sessions: "{{ was_monitor.stdout_lines[0] }}"
|
||||
backbone: "{{ was_backbone.stdout_lines[0] }}"
|
||||
|
||||
- name: Check mia-sw01 relay config
|
||||
hosts: mia-sw01
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
ashburn_ip: 137.239.194.65
|
||||
|
||||
tasks:
|
||||
- name: Check tunnel interfaces
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show ip interface brief | include Tunnel
|
||||
register: mia_tunnels
|
||||
changed_when: false
|
||||
|
||||
- name: Check Tunnel100 config
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show running-config interfaces Tunnel100
|
||||
register: mia_tunnel100
|
||||
changed_when: false
|
||||
|
||||
- name: Check Tunnel100 ACL
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show ip access-lists SEC-VALIDATOR-100-IN
|
||||
register: mia_acl
|
||||
changed_when: false
|
||||
|
||||
- name: Check route for ashburn IP
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- "show ip route {{ ashburn_ip }}"
|
||||
register: mia_route
|
||||
changed_when: false
|
||||
|
||||
- name: Check traffic-policies
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- "show running-config | section traffic-policy"
|
||||
register: mia_traffic_policy
|
||||
changed_when: false
|
||||
|
||||
- name: Check system-rule
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- "show running-config | include system-rule"
|
||||
register: mia_system_rule
|
||||
changed_when: false
|
||||
|
||||
- name: Check backbone interface
|
||||
arista.eos.eos_command:
|
||||
commands:
|
||||
- show interfaces Ethernet4/1 status
|
||||
register: mia_backbone
|
||||
changed_when: false
|
||||
|
||||
- name: Show mia-sw01 relay status
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
tunnels: "{{ mia_tunnels.stdout_lines[0] }}"
|
||||
tunnel100_config: "{{ mia_tunnel100.stdout_lines[0] }}"
|
||||
tunnel100_acl: "{{ mia_acl.stdout_lines[0] }}"
|
||||
route_to_ashburn_ip: "{{ mia_route.stdout_lines[0] }}"
|
||||
traffic_policy: "{{ mia_traffic_policy.stdout[0] | default('none') }}"
|
||||
system_rule: "{{ mia_system_rule.stdout[0] | default('none') }}"
|
||||
backbone: "{{ mia_backbone.stdout_lines[0] }}"
|
||||
|
||||
- name: Check biscayne relay state
|
||||
hosts: biscayne
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
ashburn_ip: 137.239.194.65
|
||||
tunnel_device: gre-ashburn
|
||||
tunnel_remote_ip: 169.254.100.0
|
||||
|
||||
tasks:
|
||||
- name: Check GRE tunnel
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel not found"
|
||||
executable: /bin/bash
|
||||
register: biscayne_tunnel
|
||||
changed_when: false
|
||||
|
||||
- name: Check loopback IP
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
ip addr show lo | grep '{{ ashburn_ip }}' || echo "not configured"
|
||||
executable: /bin/bash
|
||||
register: biscayne_lo
|
||||
changed_when: false
|
||||
|
||||
- name: Check iptables DNAT rules
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
iptables -t nat -L PREROUTING -v -n | grep '{{ ashburn_ip }}'
|
||||
|| echo "no DNAT rules"
|
||||
executable: /bin/bash
|
||||
register: biscayne_dnat
|
||||
changed_when: false
|
||||
become: true
|
||||
|
||||
- name: Check iptables mangle rules
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
iptables -t mangle -L PREROUTING -v -n | grep 'MARK'
|
||||
|| echo "no mangle rules"
|
||||
executable: /bin/bash
|
||||
register: biscayne_mangle
|
||||
changed_when: false
|
||||
become: true
|
||||
|
||||
- name: Check iptables SNAT rule
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
iptables -t nat -L POSTROUTING -v -n | grep '{{ ashburn_ip }}'
|
||||
|| echo "no SNAT rule"
|
||||
executable: /bin/bash
|
||||
register: biscayne_snat
|
||||
changed_when: false
|
||||
become: true
|
||||
|
||||
- name: Check policy routing
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
ip rule show | grep ashburn || echo "no policy rule"
|
||||
executable: /bin/bash
|
||||
register: biscayne_policy
|
||||
changed_when: false
|
||||
|
||||
- name: Check ashburn routing table
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
ip route show table ashburn 2>&1 || echo "table not found"
|
||||
executable: /bin/bash
|
||||
register: biscayne_table
|
||||
changed_when: false
|
||||
|
||||
- name: Check tunnel ping
|
||||
ansible.builtin.command:
|
||||
cmd: "ping -c 2 -W 2 {{ tunnel_remote_ip }}"
|
||||
register: biscayne_ping
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Check ashburn-relay service
|
||||
ansible.builtin.systemd:
|
||||
name: ashburn-relay.service
|
||||
register: biscayne_service
|
||||
check_mode: true
|
||||
failed_when: false
|
||||
|
||||
- name: Show biscayne relay status
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
gre_tunnel: "{{ biscayne_tunnel.stdout }}"
|
||||
loopback_ip: "{{ biscayne_lo.stdout }}"
|
||||
dnat_rules: "{{ biscayne_dnat.stdout_lines }}"
|
||||
mangle_rules: "{{ biscayne_mangle.stdout_lines }}"
|
||||
snat_rule: "{{ biscayne_snat.stdout_lines }}"
|
||||
policy_routing: "{{ biscayne_policy.stdout }}"
|
||||
routing_table: "{{ biscayne_table.stdout }}"
|
||||
tunnel_ping: "{{ 'OK (' + biscayne_ping.stdout_lines[-1] + ')' if biscayne_ping.rc == 0 else 'FAILED' }}"
|
||||
systemd_service: "{{ biscayne_service.status.ActiveState | default('not installed') }}"
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-mia-sw01.yml -e rollback=true
|
||||
|
||||
- name: Configure mia-sw01 validator relay tunnel
|
||||
hosts: mia-sw01
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
# ansible-playbook -i inventory/switches.yml playbooks/ashburn-relay-was-sw01.yml -e rollback=true
|
||||
|
||||
- name: Configure was-sw01 inbound validator relay
|
||||
hosts: was-sw01
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
|
|
|
|||
|
|
@ -23,12 +23,13 @@
|
|||
# mounting each boot.
|
||||
# Persisted as: format-ramdisk.service (mkfs before mount) + fstab entry
|
||||
#
|
||||
# Invariant 3: /srv/kind/solana is an rbind of /srv/solana
|
||||
# Invariant 3: /srv/kind/solana is XFS (zvol) and /srv/kind/solana/ramdisk is XFS (ram0)
|
||||
# Why: kind mounts /srv/kind → /mnt inside the kind node. PVs reference
|
||||
# /mnt/solana/*. Without the rbind, /srv/kind/solana resolves to the ZFS
|
||||
# dataset (biscayne/DATA/srv/kind), not the zvol — violating invariant 1.
|
||||
# Persisted as: fstab entry with x-systemd.requires=zfs-mount.service
|
||||
# (must mount AFTER ZFS, or ZFS overlay at /srv/kind hides it)
|
||||
# /mnt/solana/*. An rbind of /srv/solana does NOT work because ZFS's
|
||||
# shared propagation (shared:75 on /srv) overlays ZFS on top of the bind.
|
||||
# Direct device mounts bypass propagation entirely.
|
||||
# Persisted as: two fstab entries — zvol at /srv/kind/solana, ram0 at
|
||||
# /srv/kind/solana/ramdisk, both with x-systemd.requires ordering
|
||||
#
|
||||
# This playbook checks each invariant and only acts if it's not met.
|
||||
# Idempotent — safe to run multiple times.
|
||||
|
|
@ -48,6 +49,7 @@
|
|||
kind_solana_dir: /srv/kind/solana
|
||||
accounts_dir: /srv/solana/ramdisk/accounts
|
||||
deployment_dir: /srv/deployments/agave
|
||||
kind_ramdisk_opts: "noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service,x-systemd.requires=srv-kind-solana.mount"
|
||||
|
||||
tasks:
|
||||
# ---- systemd units ----------------------------------------------------------
|
||||
|
|
@ -106,30 +108,30 @@
|
|||
line: '{{ ramdisk_device }} {{ ramdisk_mount }} xfs noatime,nodiratime,nofail,x-systemd.requires=format-ramdisk.service 0 0'
|
||||
register: fstab_ramdisk
|
||||
|
||||
# rbind /srv/solana to /srv/kind/solana AFTER zfs-mount.service and ramdisk.
|
||||
# Without this ordering, ZFS overlay at /srv/kind hides the bind mount.
|
||||
- name: Ensure kind bind mount fstab entry
|
||||
# Direct device mounts at /srv/kind/solana — bypasses ZFS shared propagation.
|
||||
# An rbind of /srv/solana fails because ZFS's shared:75 on /srv overlays
|
||||
# ZFS on top of any bind mount under /srv. Direct device mounts avoid this.
|
||||
- name: Ensure kind zvol fstab entry
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/fstab
|
||||
regexp: '^\S+\s+{{ kind_solana_dir }}\s'
|
||||
line: '{{ solana_dir }} {{ kind_solana_dir }} none rbind,nofail,x-systemd.requires=zfs-mount.service,x-systemd.requires=srv-solana-ramdisk.mount 0 0'
|
||||
line: '{{ zvol_device }} {{ kind_solana_dir }} xfs defaults,nofail,x-systemd.requires=zfs-mount.service 0 0'
|
||||
register: fstab_kind
|
||||
|
||||
# Remove stale fstab entries from previous attempts (direct zvol mount,
|
||||
# separate ramdisk mount at /srv/kind/solana/ramdisk)
|
||||
- name: Remove stale kind zvol fstab entry
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/fstab
|
||||
regexp: '^{{ zvol_device }}\s+{{ kind_solana_dir }}\s'
|
||||
state: absent
|
||||
register: fstab_stale_zvol
|
||||
|
||||
- name: Remove stale kind ramdisk fstab entry
|
||||
- name: Ensure kind ramdisk fstab entry
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/fstab
|
||||
regexp: '^\S+\s+{{ kind_solana_dir }}/ramdisk\s'
|
||||
line: "{{ ramdisk_device }} {{ kind_solana_dir }}/ramdisk xfs {{ kind_ramdisk_opts }} 0 0"
|
||||
register: fstab_kind_ramdisk
|
||||
|
||||
# Remove stale rbind fstab entry from previous approach
|
||||
- name: Remove stale kind rbind fstab entry
|
||||
ansible.builtin.lineinfile:
|
||||
path: /etc/fstab
|
||||
regexp: '^\S+\s+{{ kind_solana_dir }}\s+none\s+rbind'
|
||||
state: absent
|
||||
register: fstab_stale_ramdisk
|
||||
register: fstab_stale_rbind
|
||||
|
||||
# ---- reload and enable ------------------------------------------------------
|
||||
- name: Reload systemd
|
||||
|
|
@ -137,8 +139,9 @@
|
|||
daemon_reload: true
|
||||
when: >-
|
||||
unit_file.changed or accounts_unit.changed or
|
||||
fstab_zvol.changed or fstab_ramdisk.changed or fstab_kind.changed or
|
||||
fstab_stale_zvol.changed or fstab_stale_ramdisk.changed
|
||||
fstab_zvol.changed or fstab_ramdisk.changed or
|
||||
fstab_kind.changed or fstab_kind_ramdisk.changed or
|
||||
fstab_stale_rbind.changed
|
||||
|
||||
- name: Enable ramdisk services
|
||||
ansible.builtin.systemd:
|
||||
|
|
@ -164,14 +167,14 @@
|
|||
changed_when: ramdisk_mounted.rc != 0
|
||||
when: ramdisk_mounted.rc != 0
|
||||
|
||||
# ---- apply kind bind mount now if not correct ------------------------------
|
||||
- name: Check kind bind mount
|
||||
# ---- apply kind device mounts now if not correct ----------------------------
|
||||
- name: Check kind zvol mount is XFS
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
findmnt -n -o SOURCE {{ kind_solana_dir }} | grep -q '{{ solana_dir }}'
|
||||
findmnt -n -o FSTYPE {{ kind_solana_dir }} | grep -q xfs
|
||||
executable: /bin/bash
|
||||
register: kind_mount_check
|
||||
register: kind_zvol_check
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
|
|
@ -181,17 +184,47 @@
|
|||
umount {{ kind_solana_dir }}/ramdisk 2>/dev/null || true
|
||||
umount {{ kind_solana_dir }} 2>/dev/null || true
|
||||
executable: /bin/bash
|
||||
changed_when: kind_mount_check.rc != 0
|
||||
when: kind_mount_check.rc != 0
|
||||
changed_when: kind_zvol_check.rc != 0
|
||||
when: kind_zvol_check.rc != 0
|
||||
|
||||
- name: Apply kind bind mount now
|
||||
- name: Mount zvol at kind solana dir
|
||||
ansible.posix.mount:
|
||||
path: "{{ kind_solana_dir }}"
|
||||
src: "{{ solana_dir }}"
|
||||
fstype: none
|
||||
opts: rbind
|
||||
src: "{{ zvol_device }}"
|
||||
fstype: xfs
|
||||
state: mounted
|
||||
when: kind_mount_check.rc != 0
|
||||
when: kind_zvol_check.rc != 0
|
||||
|
||||
- name: Check kind ramdisk mount is XFS
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
findmnt -n -o FSTYPE {{ kind_solana_dir }}/ramdisk | grep -q xfs
|
||||
executable: /bin/bash
|
||||
register: kind_ramdisk_check
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Mount ramdisk at kind solana ramdisk dir
|
||||
ansible.posix.mount:
|
||||
path: "{{ kind_solana_dir }}/ramdisk"
|
||||
src: "{{ ramdisk_device }}"
|
||||
fstype: xfs
|
||||
opts: noatime,nodiratime
|
||||
state: mounted
|
||||
when: kind_ramdisk_check.rc != 0
|
||||
|
||||
# Docker requires shared propagation on mounts it bind-mounts into
|
||||
# containers. Without this, `docker start` fails with "not a shared
|
||||
# or slave mount".
|
||||
# No ansible module supports mount propagation flags; command required.
|
||||
- name: Ensure shared propagation on kind mounts # noqa: command-instead-of-module
|
||||
ansible.builtin.command:
|
||||
cmd: mount --make-shared {{ item }}
|
||||
loop:
|
||||
- "{{ kind_solana_dir }}"
|
||||
- "{{ kind_solana_dir }}/ramdisk"
|
||||
changed_when: false
|
||||
|
||||
# ---- verify -----------------------------------------------------------------
|
||||
- name: Verify ramdisk is XFS
|
||||
|
|
@ -206,7 +239,19 @@
|
|||
executable: /bin/bash
|
||||
changed_when: false
|
||||
|
||||
- name: Verify kind bind mount contents
|
||||
- name: Verify kind zvol is XFS
|
||||
ansible.builtin.shell:
|
||||
cmd: set -o pipefail && df -T {{ kind_solana_dir }} | grep -q xfs
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
|
||||
- name: Verify kind ramdisk is XFS
|
||||
ansible.builtin.shell:
|
||||
cmd: set -o pipefail && df -T {{ kind_solana_dir }}/ramdisk | grep -q xfs
|
||||
executable: /bin/bash
|
||||
changed_when: false
|
||||
|
||||
- name: Verify kind mount contents
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
|
|
@ -216,8 +261,11 @@
|
|||
register: kind_mount_verify
|
||||
changed_when: false
|
||||
|
||||
# Assert the kind node sees XFS (zvol), not ZFS. If this fails, kind
|
||||
# needs a restart or laconic-so needs the HostToContainer propagation fix.
|
||||
# Assert the kind node sees XFS at the PV mount paths.
|
||||
# laconic-so creates individual extraMounts per volume:
|
||||
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
|
||||
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
|
||||
# The PV hostPaths use /mnt/<volume-name>, not /mnt/solana/<subpath>.
|
||||
- name: Read cluster-id from deployment
|
||||
ansible.builtin.shell:
|
||||
cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}'
|
||||
|
|
@ -225,12 +273,13 @@
|
|||
register: cluster_id_result
|
||||
changed_when: false
|
||||
|
||||
- name: Verify kind node sees XFS at /mnt/solana
|
||||
- name: Check kind node XFS visibility
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
docker exec {{ cluster_id_result.stdout }}-control-plane
|
||||
stat -f -c '%T' /mnt/solana | grep -q xfs
|
||||
df -T /mnt/validator-ledger /mnt/validator-accounts
|
||||
| grep -c xfs
|
||||
executable: /bin/bash
|
||||
register: kind_fstype
|
||||
changed_when: false
|
||||
|
|
@ -240,4 +289,7 @@
|
|||
ansible.builtin.debug:
|
||||
msg:
|
||||
kind_mount: "{{ kind_mount_verify.stdout_lines }}"
|
||||
kind_fstype: "{{ 'xfs (correct)' if kind_fstype.rc == 0 else 'NOT XFS — kind restart required' }}"
|
||||
kind_fstype: "{{ 'xfs (correct)' if kind_fstype.stdout | default('0') | int >= 2 else 'NOT XFS — kind restart required' }}"
|
||||
|
||||
- name: Configure Ashburn validator relay
|
||||
ansible.builtin.import_playbook: ashburn-relay-biscayne.yml
|
||||
|
|
|
|||
|
|
@ -0,0 +1,128 @@
|
|||
---
|
||||
# Start agave validator on biscayne
|
||||
#
|
||||
# Ensures the kind container is running, verifies XFS mounts are visible
|
||||
# inside the kind node, then scales the deployment to 1.
|
||||
#
|
||||
# Prerequisites:
|
||||
# - biscayne-prepare-agave.yml has been run (fstab entries, systemd units)
|
||||
# - A snapshot exists in /srv/solana/snapshots (or use biscayne-recover.yml)
|
||||
#
|
||||
# Usage:
|
||||
# ansible-playbook playbooks/biscayne-start.yml
|
||||
#
|
||||
- name: Start agave validator
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
environment:
|
||||
KUBECONFIG: /home/rix/.kube/config
|
||||
vars:
|
||||
deployment_dir: /srv/deployments/agave
|
||||
|
||||
tasks:
|
||||
# ---- discover cluster id -------------------------------------------------
|
||||
- name: Read cluster-id from deployment
|
||||
ansible.builtin.shell:
|
||||
cmd: set -o pipefail && grep '^cluster-id:' {{ deployment_dir }}/deployment.yml | awk '{print $2}'
|
||||
executable: /bin/bash
|
||||
register: cluster_id_result
|
||||
changed_when: false
|
||||
|
||||
- name: Set cluster facts
|
||||
ansible.builtin.set_fact:
|
||||
kind_cluster: "{{ cluster_id_result.stdout }}"
|
||||
kind_node: "{{ cluster_id_result.stdout }}-control-plane"
|
||||
k8s_namespace: "laconic-{{ cluster_id_result.stdout }}"
|
||||
deployment_name: "{{ cluster_id_result.stdout }}-deployment"
|
||||
|
||||
# ---- ensure kind container is running ------------------------------------
|
||||
- name: Check kind container state
|
||||
ansible.builtin.command: docker inspect -f '{% raw %}{{ .State.Running }}{% endraw %}' {{ kind_node }}
|
||||
register: kind_running
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Start kind container
|
||||
ansible.builtin.command: docker start {{ kind_node }}
|
||||
when: kind_running.stdout | default('false') != 'true'
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for kind node ready
|
||||
ansible.builtin.command: >
|
||||
kubectl get node {{ kind_node }}
|
||||
-o jsonpath='{.status.conditions[?(@.type=="Ready")].status}'
|
||||
register: node_ready
|
||||
changed_when: false
|
||||
retries: 30
|
||||
delay: 10
|
||||
until: node_ready.stdout == "True"
|
||||
|
||||
# ---- verify mounts inside kind node --------------------------------------
|
||||
# laconic-so creates individual extraMounts per volume:
|
||||
# /srv/kind/solana/ledger → /mnt/validator-ledger (inside kind node)
|
||||
# /srv/kind/solana/ramdisk/accounts → /mnt/validator-accounts
|
||||
- name: Verify kind node sees XFS at PV paths
|
||||
ansible.builtin.shell:
|
||||
cmd: >
|
||||
set -o pipefail &&
|
||||
docker exec {{ kind_node }}
|
||||
df -T /mnt/validator-ledger /mnt/validator-accounts
|
||||
| grep -c xfs
|
||||
executable: /bin/bash
|
||||
register: kind_xfs_check
|
||||
changed_when: false
|
||||
|
||||
- name: Fail if PV paths are not XFS
|
||||
ansible.builtin.fail:
|
||||
msg: >-
|
||||
Expected 2 XFS mounts (validator-ledger, validator-accounts) but
|
||||
found {{ kind_xfs_check.stdout }}. Run biscayne-prepare-agave.yml
|
||||
and restart the kind container.
|
||||
when: kind_xfs_check.stdout | int < 2
|
||||
|
||||
- name: Show kind node PV filesystems
|
||||
ansible.builtin.shell:
|
||||
cmd: |
|
||||
docker exec {{ kind_node }} df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log
|
||||
executable: /bin/bash
|
||||
register: kind_df
|
||||
changed_when: false
|
||||
|
||||
- name: Show kind mount info
|
||||
ansible.builtin.debug:
|
||||
var: kind_df.stdout_lines
|
||||
|
||||
# ---- scale up ------------------------------------------------------------
|
||||
- name: Get current replica count
|
||||
ansible.builtin.command: >
|
||||
kubectl get deployment {{ deployment_name }}
|
||||
-n {{ k8s_namespace }}
|
||||
-o jsonpath='{.spec.replicas}'
|
||||
register: current_replicas
|
||||
failed_when: false
|
||||
changed_when: false
|
||||
|
||||
- name: Scale deployment to 1
|
||||
ansible.builtin.command: >
|
||||
kubectl scale deployment {{ deployment_name }}
|
||||
-n {{ k8s_namespace }} --replicas=1
|
||||
when: current_replicas.stdout | default('0') | int == 0
|
||||
changed_when: true
|
||||
|
||||
- name: Wait for pod running
|
||||
ansible.builtin.command: >
|
||||
kubectl get pods -n {{ k8s_namespace }}
|
||||
-l app={{ deployment_name }}
|
||||
-o jsonpath='{.items[0].status.phase}'
|
||||
register: pod_phase
|
||||
changed_when: false
|
||||
retries: 30
|
||||
delay: 10
|
||||
until: pod_phase.stdout == "Running"
|
||||
|
||||
- name: Report started
|
||||
ansible.builtin.debug:
|
||||
msg: >-
|
||||
Validator started. Kind node: {{ kind_node }}.
|
||||
Pod phase: {{ pod_phase.stdout }}.
|
||||
PV mounts: XFS (zvol for ledger/snapshots/log, ram0 for accounts).
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
# ansible-playbook playbooks/connect-doublezero-multicast.yml --check # dry-run
|
||||
|
||||
- name: Connect biscayne to DoubleZero multicast
|
||||
hosts: biscayne
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
|
||||
vars:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
#!/bin/bash
|
||||
# Ashburn validator relay — runtime setup
|
||||
#
|
||||
# Called by ashburn-relay.service (After=docker.service) on boot.
|
||||
# Idempotent — safe to run multiple times.
|
||||
#
|
||||
# Creates GRE tunnel, loopback IP, iptables rules, and policy routing
|
||||
# so that validator traffic enters/exits via 137.239.194.65 (Ashburn).
|
||||
set -euo pipefail
|
||||
|
||||
# GRE tunnel to mia-sw01
|
||||
if ! ip tunnel show {{ tunnel_device }} 2>/dev/null; then
|
||||
ip tunnel add {{ tunnel_device }} mode gre \
|
||||
local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
|
||||
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
|
||||
ip link set {{ tunnel_device }} up mtu 8972
|
||||
fi
|
||||
|
||||
# Ashburn IP on loopback (so kernel accepts inbound packets)
|
||||
ip addr show lo | grep -q '{{ ashburn_ip }}' || \
|
||||
ip addr add {{ ashburn_ip }}/32 dev lo
|
||||
|
||||
# Inbound DNAT (position 1, before Docker's ADDRTYPE LOCAL rule)
|
||||
for rule in \
|
||||
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
|
||||
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
|
||||
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
|
||||
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
|
||||
"-p udp -d {{ ashburn_ip }} \
|
||||
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
||||
-j DNAT --to-destination {{ kind_node_ip }}" \
|
||||
; do
|
||||
if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then
|
||||
iptables -t nat -I PREROUTING 1 $rule
|
||||
fi
|
||||
done
|
||||
|
||||
# Outbound mangle (fwmark for policy routing)
|
||||
for rule in \
|
||||
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} \
|
||||
-j MARK --set-mark {{ fwmark }}" \
|
||||
"-p udp -s {{ kind_network }} \
|
||||
--sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
||||
-j MARK --set-mark {{ fwmark }}" \
|
||||
"-p tcp -s {{ kind_network }} --sport {{ gossip_port }} \
|
||||
-j MARK --set-mark {{ fwmark }}" \
|
||||
; do
|
||||
if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then
|
||||
iptables -t mangle -A PREROUTING $rule
|
||||
fi
|
||||
done
|
||||
|
||||
# Outbound SNAT (position 1, before Docker MASQUERADE)
|
||||
snat_rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}"
|
||||
if ! iptables -t nat -C POSTROUTING $snat_rule 2>/dev/null; then
|
||||
iptables -t nat -I POSTROUTING 1 $snat_rule
|
||||
fi
|
||||
|
||||
# Policy routing table
|
||||
grep -q '^{{ rt_table_id }} {{ rt_table_name }}$' /etc/iproute2/rt_tables || \
|
||||
echo "{{ rt_table_id }} {{ rt_table_name }}" >> /etc/iproute2/rt_tables
|
||||
ip rule show | grep -q 'fwmark 0x64 lookup ashburn' || \
|
||||
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
|
||||
ip route replace default \
|
||||
via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}
|
||||
|
|
@ -11,7 +11,7 @@
|
|||
# ansible-playbook playbooks/health-check.yml -t network # just network checks
|
||||
|
||||
- name: Biscayne agave-stack health check
|
||||
hosts: biscayne
|
||||
hosts: all
|
||||
gather_facts: false
|
||||
environment:
|
||||
KUBECONFIG: /home/rix/.kube/config
|
||||
|
|
@ -249,16 +249,14 @@
|
|||
ansible.builtin.shell:
|
||||
cmd: |
|
||||
set -o pipefail
|
||||
echo "=== /mnt/solana contents ==="
|
||||
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/
|
||||
echo "=== /mnt/solana filesystem ==="
|
||||
docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana
|
||||
echo "=== /mnt/solana/ramdisk filesystem ==="
|
||||
docker exec {{ kind_cluster }}-control-plane df -T /mnt/solana/ramdisk 2>/dev/null || echo "ramdisk not visible"
|
||||
echo "=== /mnt/solana/snapshots ==="
|
||||
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/snapshots/ 2>/dev/null || echo "snapshots not visible"
|
||||
echo "=== /mnt/solana/ledger ==="
|
||||
docker exec {{ kind_cluster }}-control-plane ls /mnt/solana/ledger/ 2>/dev/null | head -5 || echo "ledger not visible"
|
||||
echo "=== PV mount filesystems ==="
|
||||
docker exec {{ kind_cluster }}-control-plane df -T /mnt/validator-ledger /mnt/validator-accounts /mnt/validator-snapshots /mnt/validator-log 2>/dev/null || echo "PV mounts not visible"
|
||||
echo "=== /mnt/validator-ledger ==="
|
||||
docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-ledger/ 2>/dev/null | head -5 || echo "ledger not visible"
|
||||
echo "=== /mnt/validator-snapshots ==="
|
||||
docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-snapshots/ 2>/dev/null || echo "snapshots not visible"
|
||||
echo "=== /mnt/validator-accounts ==="
|
||||
docker exec {{ kind_cluster }}-control-plane ls /mnt/validator-accounts/ 2>/dev/null || echo "accounts not visible"
|
||||
executable: /bin/bash
|
||||
register: kind_mounts
|
||||
changed_when: false
|
||||
|
|
|
|||
Loading…
Reference in New Issue