stack-orchestrator/playbooks/ashburn-relay-biscayne.yml

462 lines
17 KiB
YAML
Raw Normal View History

---
# Configure biscayne for Ashburn validator relay
#
# Sets up inbound DNAT (137.239.194.65 → kind node) and outbound SNAT +
# policy routing (validator traffic → GRE tunnel → mia-sw01 → was-sw01).
#
# Uses a dedicated GRE tunnel to mia-sw01 (NOT the DoubleZero-managed
# doublezero0/Tunnel500). The tunnel source is biscayne's public IP
# (186.233.184.235) and the destination is mia-sw01's free LAN IP
# (209.42.167.137).
#
# Usage:
# # Full setup (inbound + outbound)
# ansible-playbook playbooks/ashburn-relay-biscayne.yml
#
# # Inbound only (DNAT rules)
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t inbound
#
# # Outbound only (SNAT + policy routing)
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t outbound
#
# # Pre-flight checks only
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t preflight
#
# # Rollback
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true
- name: Configure biscayne Ashburn validator relay
hosts: all
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
kind_node_ip: 172.20.0.2
kind_network: 172.20.0.0/16
# New dedicated GRE tunnel (not DZ-managed doublezero0)
tunnel_device: gre-ashburn
tunnel_local_ip: 169.254.100.1 # biscayne end of /31
tunnel_remote_ip: 169.254.100.0 # mia-sw01 end of /31
tunnel_src: 186.233.184.235 # biscayne public IP
tunnel_dst: 209.42.167.137 # mia-sw01 free LAN IP
fwmark: 100
rt_table_name: ashburn
rt_table_id: 100
gossip_port: 8001
dynamic_port_range_start: 9000
dynamic_port_range_end: 9025
rollback: false
tasks:
# ------------------------------------------------------------------
# Rollback
# ------------------------------------------------------------------
- name: Rollback all Ashburn relay rules
when: rollback | bool
block:
- name: Remove Ashburn IP from loopback
ansible.builtin.command:
cmd: ip addr del {{ ashburn_ip }}/32 dev lo
failed_when: false
changed_when: false
- name: Remove GRE tunnel
ansible.builtin.shell:
cmd: |
ip link set {{ tunnel_device }} down 2>/dev/null || true
ip tunnel del {{ tunnel_device }} 2>/dev/null || true
executable: /bin/bash
changed_when: false
- name: Remove inbound DNAT rules
ansible.builtin.shell:
cmd: |
set -o pipefail
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
--dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
2>/dev/null || true
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \
--dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
2>/dev/null || true
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j DNAT --to-destination {{ kind_node_ip }} \
2>/dev/null || true
executable: /bin/bash
changed_when: false
- name: Remove outbound mangle rules
ansible.builtin.shell:
cmd: |
set -o pipefail
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p udp --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p tcp --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
executable: /bin/bash
changed_when: false
- name: Remove outbound SNAT rule
ansible.builtin.shell:
cmd: iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true
executable: /bin/bash
changed_when: false
- name: Remove policy routing
ansible.builtin.shell:
cmd: |
ip rule del fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true
ip route del default table {{ rt_table_name }} 2>/dev/null || true
executable: /bin/bash
changed_when: false
- name: Disable and remove ashburn-relay service
ansible.builtin.systemd:
name: ashburn-relay.service
enabled: false
state: stopped
failed_when: false
- name: Remove ashburn-relay files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- /etc/systemd/system/ashburn-relay.service
- /usr/local/sbin/ashburn-relay-setup.sh
- /etc/network/if-up.d/ashburn-routing
- name: Rollback complete
ansible.builtin.debug:
msg: "Ashburn relay rules removed."
- name: End play after rollback
ansible.builtin.meta: end_play
# ------------------------------------------------------------------
# Pre-flight checks
# ------------------------------------------------------------------
- name: Check tunnel destination is reachable
ansible.builtin.command:
cmd: ping -c 1 -W 2 {{ tunnel_dst }}
register: tunnel_dst_ping
changed_when: false
failed_when: tunnel_dst_ping.rc != 0
tags: [preflight, outbound]
- name: Check kind node is reachable
ansible.builtin.command:
cmd: ping -c 1 -W 2 {{ kind_node_ip }}
register: kind_ping
changed_when: false
failed_when: kind_ping.rc != 0
tags: [preflight, inbound]
- name: Show existing iptables nat rules
ansible.builtin.shell:
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers | head -60
executable: /bin/bash
register: existing_nat
changed_when: false
tags: [preflight]
- name: Display existing NAT rules
ansible.builtin.debug:
var: existing_nat.stdout_lines
tags: [preflight]
- name: Check for existing GRE tunnel
ansible.builtin.shell:
cmd: ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel does not exist"
executable: /bin/bash
register: existing_tunnel
changed_when: false
tags: [preflight]
- name: Display existing tunnel
ansible.builtin.debug:
var: existing_tunnel.stdout_lines
tags: [preflight]
# ------------------------------------------------------------------
# GRE tunnel setup
# ------------------------------------------------------------------
- name: Create GRE tunnel
ansible.builtin.shell:
cmd: |
set -o pipefail
if ip tunnel show {{ tunnel_device }} 2>/dev/null; then
echo "tunnel already exists"
else
ip tunnel add {{ tunnel_device }} mode gre local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
ip link set {{ tunnel_device }} up mtu 8972
echo "tunnel created"
fi
executable: /bin/bash
register: tunnel_result
changed_when: "'created' in tunnel_result.stdout"
tags: [outbound]
- name: Show tunnel result
ansible.builtin.debug:
var: tunnel_result.stdout_lines
tags: [outbound]
# ------------------------------------------------------------------
# Inbound: DNAT for 137.239.194.65 → kind node
# ------------------------------------------------------------------
- name: Add Ashburn IP to loopback
ansible.builtin.command:
cmd: ip addr add {{ ashburn_ip }}/32 dev lo
register: add_ip
changed_when: add_ip.rc == 0
failed_when: "add_ip.rc != 0 and 'RTNETLINK answers: File exists' not in add_ip.stderr"
tags: [inbound]
- name: Add DNAT rules (inserted before DOCKER chain)
ansible.builtin.shell:
cmd: |
set -o pipefail
# DNAT rules must be before Docker's ADDRTYPE LOCAL rule, otherwise
# Docker's PREROUTING chain swallows traffic to 137.239.194.65 (which
# is on loopback and therefore type LOCAL).
for rule in \
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }}" \
; do
if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then
iptables -t nat -I PREROUTING 1 $rule
echo "added: $rule"
else
echo "exists: $rule"
fi
done
executable: /bin/bash
register: dnat_result
changed_when: "'added' in dnat_result.stdout"
tags: [inbound]
- name: Show DNAT result
ansible.builtin.debug:
var: dnat_result.stdout_lines
tags: [inbound]
# ------------------------------------------------------------------
# Outbound: fwmark + SNAT + policy routing via new tunnel
# ------------------------------------------------------------------
- name: Mark outbound validator traffic (mangle PREROUTING)
ansible.builtin.shell:
cmd: |
set -o pipefail
for rule in \
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \
"-p udp -s {{ kind_network }} --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }}" \
"-p tcp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \
; do
if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then
iptables -t mangle -A PREROUTING $rule
echo "added: $rule"
else
echo "exists: $rule"
fi
done
executable: /bin/bash
register: mangle_result
changed_when: "'added' in mangle_result.stdout"
tags: [outbound]
- name: Show mangle result
ansible.builtin.debug:
var: mangle_result.stdout_lines
tags: [outbound]
- name: SNAT marked traffic to Ashburn IP (before Docker MASQUERADE)
ansible.builtin.shell:
cmd: |
set -o pipefail
if iptables -t nat -C POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null; then
echo "SNAT rule already exists"
else
iptables -t nat -I POSTROUTING 1 -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}
echo "SNAT rule inserted at position 1"
fi
executable: /bin/bash
register: snat_result
changed_when: "'inserted' in snat_result.stdout"
tags: [outbound]
- name: Show SNAT result
ansible.builtin.debug:
var: snat_result.stdout
tags: [outbound]
- name: Ensure rt_tables entry exists
ansible.builtin.lineinfile:
path: /etc/iproute2/rt_tables
line: "{{ rt_table_id }} {{ rt_table_name }}"
regexp: "^{{ rt_table_id }}\\s"
tags: [outbound]
- name: Add policy routing rule for fwmark
ansible.builtin.shell:
cmd: |
set -o pipefail
if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then
echo "rule already exists"
else
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
echo "rule added"
fi
executable: /bin/bash
register: rule_result
changed_when: "'added' in rule_result.stdout"
tags: [outbound]
- name: Add default route via GRE tunnel in ashburn table
ansible.builtin.shell:
cmd: ip route replace default via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}
executable: /bin/bash
changed_when: true
tags: [outbound]
# ------------------------------------------------------------------
# Persistence
# ------------------------------------------------------------------
# A systemd oneshot service replaces both if-up.d (which depends on
# networking.service, inactive on this host) and netfilter-persistent
# (which runs before Docker, so Docker's chain setup blows away rules).
# This service runs After=docker.service and idempotently applies all
# tunnel, iptables, and policy routing state.
- name: Install ashburn-relay systemd service
ansible.builtin.copy:
dest: /etc/systemd/system/ashburn-relay.service
mode: "0644"
content: |
[Unit]
Description=Ashburn validator relay (GRE tunnel, iptables, policy routing)
After=docker.service network-online.target
Wants=network-online.target
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/local/sbin/ashburn-relay-setup.sh
[Install]
WantedBy=multi-user.target
register: relay_unit
tags: [inbound, outbound]
- name: Install ashburn-relay setup script
ansible.builtin.template:
src: files/ashburn-relay-setup.sh.j2
dest: /usr/local/sbin/ashburn-relay-setup.sh
mode: "0755"
register: relay_script
tags: [inbound, outbound]
- name: Reload systemd and enable ashburn-relay
ansible.builtin.systemd:
name: ashburn-relay.service
daemon_reload: "{{ relay_unit.changed or relay_script.changed }}"
enabled: true
tags: [inbound, outbound]
- name: Remove stale if-up.d script
ansible.builtin.file:
path: /etc/network/if-up.d/ashburn-routing
state: absent
tags: [inbound, outbound]
# ------------------------------------------------------------------
# Verification
# ------------------------------------------------------------------
- name: Show tunnel status
ansible.builtin.shell:
cmd: |
echo "=== tunnel ==="
ip tunnel show {{ tunnel_device }}
echo ""
echo "=== tunnel addr ==="
ip addr show {{ tunnel_device }}
echo ""
echo "=== ping tunnel peer ==="
ping -c 1 -W 2 {{ tunnel_remote_ip }} 2>&1 || echo "tunnel peer unreachable"
executable: /bin/bash
register: tunnel_status
changed_when: false
tags: [outbound]
- name: Show NAT rules
ansible.builtin.shell:
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers 2>&1 | head -40
executable: /bin/bash
register: nat_rules
changed_when: false
tags: [inbound, outbound]
- name: Show mangle rules
ansible.builtin.shell:
cmd: iptables -t mangle -L -v -n 2>&1
executable: /bin/bash
register: mangle_rules
changed_when: false
tags: [outbound]
- name: Show policy routing
ansible.builtin.shell:
cmd: |
echo "=== ip rule ==="
ip rule show
echo ""
echo "=== ashburn routing table ==="
ip route show table {{ rt_table_name }} 2>/dev/null || echo "table empty"
executable: /bin/bash
register: routing_info
changed_when: false
tags: [outbound]
- name: Show loopback addresses
ansible.builtin.shell:
cmd: set -o pipefail && ip addr show lo | grep inet
executable: /bin/bash
register: lo_addrs
changed_when: false
tags: [inbound]
- name: Display verification
ansible.builtin.debug:
msg:
tunnel: "{{ tunnel_status.stdout_lines | default([]) }}"
nat_rules: "{{ nat_rules.stdout_lines }}"
mangle_rules: "{{ mangle_rules.stdout_lines | default([]) }}"
routing: "{{ routing_info.stdout_lines | default([]) }}"
loopback: "{{ lo_addrs.stdout_lines | default([]) }}"
tags: [inbound, outbound]
- name: Summary
ansible.builtin.debug:
msg: |
=== Ashburn Relay Setup Complete ===
Ashburn IP: {{ ashburn_ip }} (on lo)
GRE tunnel: {{ tunnel_device }} ({{ tunnel_src }} → {{ tunnel_dst }})
link: {{ tunnel_local_ip }}/31 ↔ {{ tunnel_remote_ip }}/31
Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }}
Outbound SNAT: {{ kind_network }} sport 8001,9000-9025 → {{ ashburn_ip }}
Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_remote_ip }} dev {{ tunnel_device }}
Next steps:
1. Apply mia-sw01 config (Tunnel100 must be up on both sides)
2. Verify tunnel: ping {{ tunnel_remote_ip }}
3. Test from kelce: echo test | nc -u -w 1 137.239.194.65 9000
4. Check validator gossip ContactInfo shows {{ ashburn_ip }} for all addresses