stack-orchestrator/playbooks/ashburn-relay-biscayne.yml

526 lines
21 KiB
YAML
Raw Normal View History

---
# Configure biscayne for Ashburn validator relay
#
# WHY THIS USES FIREWALLD ZONES (not just iptables):
#
# Biscayne runs firewalld with the nftables backend. Firewalld's nftables
# filter_FORWARD chain (priority filter+10) rejects forwarded traffic
# between interfaces that aren't in known zones. The original playbook
# used only iptables rules, which run at priority filter (0) and were
# accepted by Docker's chains — but then firewalld's nftables chain
# rejected the same packets because it didn't know about the Docker
# bridges or gre-ashburn.
#
# The fix: Docker bridges go in the 'docker' zone, gre-ashburn goes in
# 'trusted', and a 'docker-to-relay' policy allows forwarding between
# them. These are firewalld --permanent rules that survive reboots.
#
# WHY IPTABLES IS STILL NEEDED:
#
# Docker's iptables FORWARD chain (priority filter) runs BEFORE
# firewalld's nftables chain (priority filter+10). Docker's FORWARD
# policy is DROP, and its DOCKER-FORWARD subchain only accepts
# RELATED,ESTABLISHED inbound. So NEW inbound DNAT'd traffic is dropped
# by Docker before firewalld can accept it. DOCKER-USER ACCEPT rules
# and DNAT-before-Docker ordering must remain as iptables.
#
# Layers:
# 1. Firewalld zones + policies (permanent, survives reboots/reloads)
# 2. GRE tunnel + loopback IP (iproute2, restored by systemd service)
# 3. iptables DNAT/SNAT/mangle (restored by systemd service)
# 4. Policy routing (iproute2, restored by systemd service)
#
# Usage:
# # Full setup (inbound + outbound)
# ansible-playbook playbooks/ashburn-relay-biscayne.yml
#
# # Inbound only (DNAT rules)
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t inbound
#
# # Outbound only (SNAT + policy routing)
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t outbound
#
# # Rollback
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true
- name: Configure biscayne Ashburn validator relay
hosts: all
gather_facts: false
vars:
ashburn_ip: 137.239.194.65
kind_node_ip: 172.20.0.2
kind_network: 172.20.0.0/16
tunnel_device: gre-ashburn
tunnel_local_ip: 169.254.100.1 # biscayne end of /31
tunnel_remote_ip: 169.254.100.0 # mia-sw01 end of /31
tunnel_src: 186.233.184.235 # biscayne public IP
tunnel_dst: 209.42.167.137 # mia-sw01 free LAN IP
fwmark: 100
rt_table_name: ashburn
rt_table_id: 100
gossip_port: 8001
dynamic_port_range_start: 9000
dynamic_port_range_end: 9025
# Docker bridge for the kind network — find with:
# ip route get 172.20.0.2 | grep -oP 'dev \K\S+'
docker_bridges:
- br-cf46a62ab5b2
- docker0
- br-4fb6f6795448
rollback: false
tasks:
# ------------------------------------------------------------------
# Rollback
# ------------------------------------------------------------------
- name: Rollback all Ashburn relay rules
when: rollback | bool
block:
- name: Remove firewalld zone assignments
ansible.posix.firewalld:
zone: "{{ item.zone }}"
interface: "{{ item.iface }}"
permanent: true
state: disabled
loop:
- {zone: docker, iface: br-cf46a62ab5b2}
- {zone: docker, iface: docker0}
- {zone: docker, iface: br-4fb6f6795448}
- {zone: trusted, iface: gre-ashburn}
failed_when: false
- name: Remove docker-to-relay policy
ansible.builtin.command:
cmd: firewall-cmd --permanent --delete-policy=docker-to-relay
failed_when: false
changed_when: false
- name: Reload firewalld
ansible.builtin.command:
cmd: firewall-cmd --reload
changed_when: false
- name: Remove Ashburn IP from loopback
ansible.builtin.command:
cmd: ip addr del {{ ashburn_ip }}/32 dev lo
failed_when: false
changed_when: false
- name: Remove GRE tunnel
ansible.builtin.command:
cmd: ip tunnel del {{ tunnel_device }}
failed_when: false
changed_when: false
- name: Flush iptables relay rules
ansible.builtin.shell:
cmd: |
set -euo pipefail
# DNAT
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
--dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \
--dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true
# DOCKER-USER
iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \
--dport {{ gossip_port }} -j ACCEPT 2>/dev/null || true
iptables -D DOCKER-USER -p tcp -d {{ kind_node_ip }} \
--dport {{ gossip_port }} -j ACCEPT 2>/dev/null || true
iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j ACCEPT 2>/dev/null || true
# Mangle
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
-p tcp --dport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
# SNAT
iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} \
-j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true
executable: /bin/bash
changed_when: false
- name: Remove policy routing
ansible.builtin.shell:
cmd: |
ip rule del fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true
ip rule del from {{ kind_network }} fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true
ip route del default table {{ rt_table_name }} 2>/dev/null || true
executable: /bin/bash
changed_when: false
- name: Disable and remove ashburn-relay service
ansible.builtin.systemd:
name: ashburn-relay.service
enabled: false
state: stopped
failed_when: false
- name: Remove ashburn-relay files
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- /etc/systemd/system/ashburn-relay.service
- /usr/local/sbin/ashburn-relay-setup.sh
- /etc/network/if-up.d/ashburn-routing
- name: Rollback complete
ansible.builtin.debug:
msg: "Ashburn relay rules removed."
- name: End play after rollback
ansible.builtin.meta: end_play
# ------------------------------------------------------------------
# Pre-flight checks
# ------------------------------------------------------------------
- name: Check tunnel destination is reachable
ansible.builtin.command:
cmd: ping -c 1 -W 2 {{ tunnel_dst }}
register: tunnel_dst_ping
changed_when: false
failed_when: tunnel_dst_ping.rc != 0
tags: [preflight, outbound]
- name: Check kind node is reachable
ansible.builtin.command:
cmd: ping -c 1 -W 2 {{ kind_node_ip }}
register: kind_ping
changed_when: false
failed_when: kind_ping.rc != 0
tags: [preflight, inbound]
# ------------------------------------------------------------------
# Firewalld zones and policies (permanent, survives reboots)
# ------------------------------------------------------------------
# Docker's iptables FORWARD chain (priority filter) drops packets that
# don't match DOCKER-USER or DOCKER-FORWARD. Firewalld's nftables
# filter_FORWARD (priority filter+10) then checks zone-based policies.
# Without the docker zone + docker-to-relay policy, firewalld rejects
# outbound traffic from Docker bridges to gre-ashburn because neither
# interface is in a known zone.
- name: Add Docker bridges to docker zone
ansible.posix.firewalld:
zone: docker
interface: "{{ item }}"
permanent: true
state: enabled
loop: "{{ docker_bridges }}"
register: docker_zone_result
tags: [outbound, inbound]
- name: Add GRE tunnel to trusted zone
ansible.posix.firewalld:
zone: trusted
interface: "{{ tunnel_device }}"
permanent: true
state: enabled
register: trusted_zone_result
tags: [outbound, inbound]
- name: Check if docker-to-relay policy exists
ansible.builtin.command:
cmd: firewall-cmd --info-policy=docker-to-relay
register: policy_check
changed_when: false
failed_when: false
tags: [outbound]
- name: Create docker-to-relay forwarding policy
when: policy_check.rc != 0
ansible.builtin.shell:
cmd: |
set -euo pipefail
firewall-cmd --permanent --new-policy=docker-to-relay
firewall-cmd --permanent --policy=docker-to-relay --set-target=ACCEPT
firewall-cmd --permanent --policy=docker-to-relay --add-ingress-zone=docker
firewall-cmd --permanent --policy=docker-to-relay --add-egress-zone=trusted
echo "policy created"
executable: /bin/bash
register: policy_result
changed_when: "'created' in policy_result.stdout"
tags: [outbound]
- name: Reload firewalld
ansible.builtin.command:
cmd: firewall-cmd --reload
when: >-
docker_zone_result.changed or
trusted_zone_result.changed or
(policy_result is defined and policy_result.changed)
changed_when: true
tags: [outbound, inbound]
# ------------------------------------------------------------------
# GRE tunnel setup (iproute2)
# ------------------------------------------------------------------
- name: Create GRE tunnel
ansible.builtin.shell:
cmd: |
set -euo pipefail
if ip tunnel show {{ tunnel_device }} 2>/dev/null; then
echo "tunnel already exists"
else
ip tunnel add {{ tunnel_device }} mode gre \
local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
ip link set {{ tunnel_device }} up mtu 8972
echo "tunnel created"
fi
executable: /bin/bash
register: tunnel_result
changed_when: "'created' in tunnel_result.stdout"
tags: [outbound]
# ------------------------------------------------------------------
# Inbound: DNAT for 137.239.194.65 → kind node (iptables)
# ------------------------------------------------------------------
# These must be iptables rules inserted before Docker's ADDRTYPE LOCAL
# rule in nat PREROUTING. Firewalld forward-ports can't guarantee
# ordering relative to Docker's chains.
- name: Add Ashburn IP to loopback
ansible.builtin.command:
cmd: ip addr add {{ ashburn_ip }}/32 dev lo
register: add_ip
changed_when: add_ip.rc == 0
failed_when: >-
add_ip.rc != 0 and
'already assigned' not in add_ip.stderr and
'File exists' not in add_ip.stderr
tags: [inbound]
- name: Add DNAT rules (before Docker's chain)
ansible.builtin.shell:
cmd: |
set -o pipefail
for rule in \
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p udp -d {{ ashburn_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j DNAT --to-destination {{ kind_node_ip }}" \
; do
if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then
iptables -t nat -I PREROUTING 1 $rule
echo "added: $rule"
else
echo "exists: $rule"
fi
done
executable: /bin/bash
register: dnat_result
changed_when: "'added' in dnat_result.stdout"
tags: [inbound]
- name: Allow DNAT'd traffic through Docker's FORWARD chain
ansible.builtin.shell:
cmd: |
set -o pipefail
# Docker's iptables FORWARD (priority filter) drops NEW inbound
# traffic to bridge networks. DOCKER-USER is the only place to
# add ACCEPT rules that survive Docker daemon restarts.
for rule in \
"-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
"-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
"-p udp -d {{ kind_node_ip }} --dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j ACCEPT" \
; do
if ! iptables -C DOCKER-USER $rule 2>/dev/null; then
iptables -I DOCKER-USER 1 $rule
echo "added: $rule"
else
echo "exists: $rule"
fi
done
executable: /bin/bash
register: forward_result
changed_when: "'added' in forward_result.stdout"
tags: [inbound]
# ------------------------------------------------------------------
# Outbound: fwmark + SNAT + policy routing (iptables + iproute2)
# ------------------------------------------------------------------
# Mangle marks select which traffic gets policy-routed through the
# relay. Source-based routing (ip rule from 172.20.0.0/16) would be
# simpler but routes ALL Docker traffic through the tunnel, including
# DNS and health checks. Port-specific marks keep non-validator
# traffic on the default route.
- name: Mark outbound validator traffic (mangle PREROUTING)
ansible.builtin.shell:
cmd: |
set -o pipefail
for rule in \
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \
"-p udp -s {{ kind_network }} --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} -j MARK --set-mark {{ fwmark }}" \
"-p tcp -s {{ kind_network }} --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \
"-p tcp -s {{ kind_network }} --dport {{ gossip_port }} -j MARK --set-mark {{ fwmark }}" \
; do
if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then
iptables -t mangle -A PREROUTING $rule
echo "added: $rule"
else
echo "exists: $rule"
fi
done
executable: /bin/bash
register: mangle_result
changed_when: "'added' in mangle_result.stdout"
tags: [outbound]
- name: SNAT marked traffic to Ashburn IP
ansible.builtin.shell:
cmd: |
set -o pipefail
rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}"
if iptables -t nat -C POSTROUTING $rule 2>/dev/null; then
echo "exists"
else
iptables -t nat -I POSTROUTING 1 $rule
echo "added"
fi
executable: /bin/bash
register: snat_result
changed_when: "'added' in snat_result.stdout"
tags: [outbound]
- name: Ensure rt_tables entry exists
ansible.builtin.lineinfile:
path: /etc/iproute2/rt_tables
line: "{{ rt_table_id }} {{ rt_table_name }}"
regexp: "^{{ rt_table_id }}\\s"
tags: [outbound]
- name: Add policy routing rule for fwmark
ansible.builtin.shell:
cmd: |
set -o pipefail
if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then
echo "exists"
else
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
echo "added"
fi
executable: /bin/bash
register: rule_result
changed_when: "'added' in rule_result.stdout"
tags: [outbound]
- name: Add default route via GRE tunnel in ashburn table
ansible.builtin.command:
cmd: ip route replace default via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}
changed_when: true
tags: [outbound]
# ------------------------------------------------------------------
# Persistence (systemd service for non-firewalld state)
# ------------------------------------------------------------------
# Firewalld zones/policies persist natively (--permanent + reload).
# The systemd service restores: GRE tunnel, loopback IP, iptables
# rules (DNAT, DOCKER-USER, mangle, SNAT), and policy routing.
# Runs After=docker.service because Docker recreates its iptables
# chains on startup.
- name: Install ashburn-relay systemd service
ansible.builtin.copy:
dest: /etc/systemd/system/ashburn-relay.service
mode: "0644"
content: |
[Unit]
Description=Ashburn relay (GRE tunnel, iptables, policy routing)
After=docker.service network-online.target firewalld.service
Wants=network-online.target
Requires=firewalld.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/local/sbin/ashburn-relay-setup.sh
[Install]
WantedBy=multi-user.target
register: relay_unit
tags: [inbound, outbound]
- name: Install ashburn-relay setup script
ansible.builtin.template:
src: files/ashburn-relay-setup.sh.j2
dest: /usr/local/sbin/ashburn-relay-setup.sh
mode: "0755"
register: relay_script
tags: [inbound, outbound]
- name: Reload systemd and enable ashburn-relay
ansible.builtin.systemd:
name: ashburn-relay.service
daemon_reload: "{{ relay_unit.changed or relay_script.changed }}"
enabled: true
tags: [inbound, outbound]
- name: Remove stale if-up.d script
ansible.builtin.file:
path: /etc/network/if-up.d/ashburn-routing
state: absent
tags: [inbound, outbound]
# ------------------------------------------------------------------
# Verification
# ------------------------------------------------------------------
- name: Verify firewalld zones
ansible.builtin.shell:
cmd: |
echo "=== docker zone ==="
firewall-cmd --zone=docker --list-interfaces
echo "=== trusted zone ==="
firewall-cmd --zone=trusted --list-interfaces
echo "=== docker-to-relay policy ==="
firewall-cmd --info-policy=docker-to-relay
executable: /bin/bash
register: zone_info
changed_when: false
tags: [outbound, inbound]
- name: Verify tunnel and routing
ansible.builtin.shell:
cmd: |
echo "=== tunnel ==="
ip tunnel show {{ tunnel_device }}
ip addr show {{ tunnel_device }}
echo "=== ping tunnel peer ==="
ping -c 1 -W 2 {{ tunnel_remote_ip }} 2>&1 || echo "tunnel peer unreachable"
echo "=== ip rule ==="
ip rule show
echo "=== ashburn table ==="
ip route show table {{ rt_table_name }} 2>/dev/null || echo "table empty"
executable: /bin/bash
register: routing_info
changed_when: false
tags: [outbound]
- name: Display verification
ansible.builtin.debug:
msg:
firewalld: "{{ zone_info.stdout_lines }}"
routing: "{{ routing_info.stdout_lines | default([]) }}"
tags: [inbound, outbound]
- name: Summary
ansible.builtin.debug:
msg: |
=== Ashburn Relay Setup Complete ===
Ashburn IP: {{ ashburn_ip }} (on lo)
GRE tunnel: {{ tunnel_device }} ({{ tunnel_src }} → {{ tunnel_dst }})
link: {{ tunnel_local_ip }}/31 ↔ {{ tunnel_remote_ip }}/31
Firewalld: Docker bridges in 'docker' zone, {{ tunnel_device }} in 'trusted' zone
Policy: docker-to-relay (docker → trusted, ACCEPT)
Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }} (iptables)
Outbound SNAT: fwmark {{ fwmark }} → {{ ashburn_ip }} (iptables)
Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_remote_ip }}