stack-orchestrator/playbooks/files/ashburn-relay-setup.sh.j2

93 lines
3.8 KiB
Django/Jinja

#!/bin/bash
# Ashburn validator relay — runtime setup
#
# Called by ashburn-relay.service (After=docker.service firewalld.service)
# on boot. Idempotent — safe to run multiple times.
#
# Creates GRE tunnel, loopback IP, iptables rules, and policy routing.
# Firewalld zones/policies are permanent (not managed here).
#
# WHY IPTABLES + FIREWALLD:
# Docker uses iptables (priority filter) for its FORWARD chain.
# Firewalld uses nftables (priority filter+10). Docker's chain runs
# first and drops packets that firewalld would accept. So:
# - DNAT must be iptables (inserted before Docker's ADDRTYPE LOCAL rule)
# - DOCKER-USER must be iptables (Docker's FORWARD chain requires it)
# - Mangle/SNAT must be iptables (firewalld has no native mark/SNAT)
# - Forwarding (Docker bridge → gre-ashburn) is firewalld (zones/policies)
set -euo pipefail
# GRE tunnel to mia-sw01
if ! ip tunnel show {{ tunnel_device }} 2>/dev/null; then
ip tunnel add {{ tunnel_device }} mode gre \
local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
ip link set {{ tunnel_device }} up mtu 8972
fi
# Ashburn IP on loopback (so kernel accepts inbound packets)
ip addr show lo | grep -q '{{ ashburn_ip }}' || \
ip addr add {{ ashburn_ip }}/32 dev lo
# Inbound DNAT (position 1, before Docker's ADDRTYPE LOCAL rule)
for rule in \
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} \
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
"-p udp -d {{ ashburn_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j DNAT --to-destination {{ kind_node_ip }}" \
; do
if ! iptables -t nat -C PREROUTING $rule 2>/dev/null; then
iptables -t nat -I PREROUTING 1 $rule
fi
done
# DOCKER-USER: accept DNAT'd relay traffic through Docker's FORWARD chain.
# Docker's iptables FORWARD (priority filter) drops NEW inbound traffic to
# bridge networks. DOCKER-USER is the only place for user ACCEPT rules
# that survive Docker daemon restarts.
for rule in \
"-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
"-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
"-p udp -d {{ kind_node_ip }} \
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j ACCEPT" \
; do
if ! iptables -C DOCKER-USER $rule 2>/dev/null; then
iptables -I DOCKER-USER 1 $rule
fi
done
# Outbound mangle (fwmark for policy routing)
for rule in \
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }}" \
"-p udp -s {{ kind_network }} \
--sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
-j MARK --set-mark {{ fwmark }}" \
"-p tcp -s {{ kind_network }} --sport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }}" \
"-p tcp -s {{ kind_network }} --dport {{ gossip_port }} \
-j MARK --set-mark {{ fwmark }}" \
; do
if ! iptables -t mangle -C PREROUTING $rule 2>/dev/null; then
iptables -t mangle -A PREROUTING $rule
fi
done
# Outbound SNAT (position 1, before Docker MASQUERADE)
snat_rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}"
if ! iptables -t nat -C POSTROUTING $snat_rule 2>/dev/null; then
iptables -t nat -I POSTROUTING 1 $snat_rule
fi
# Policy routing table
grep -q '^{{ rt_table_id }} {{ rt_table_name }}$' /etc/iproute2/rt_tables || \
echo "{{ rt_table_id }} {{ rt_table_name }}" >> /etc/iproute2/rt_tables
ip rule show | grep -q 'fwmark 0x64 lookup ashburn' || \
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
ip route replace default \
via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}