fix: migrate ashburn relay playbook to firewalld + iptables coexistence
Firewalld zones/policies for forwarding (Docker bridge → gre-ashburn), iptables for Docker-specific rules (DNAT, DOCKER-USER, mangle, SNAT). Both coexist at different netfilter priorities. See docs/postmortem-ashburn-relay-outbound.md for root cause analysis. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>fix/kind-mount-propagation
parent
3bf87a2e9b
commit
68edcc60c7
|
|
@ -1,13 +1,34 @@
|
||||||
---
|
---
|
||||||
# Configure biscayne for Ashburn validator relay
|
# Configure biscayne for Ashburn validator relay
|
||||||
#
|
#
|
||||||
# Sets up inbound DNAT (137.239.194.65 → kind node) and outbound SNAT +
|
# WHY THIS USES FIREWALLD ZONES (not just iptables):
|
||||||
# policy routing (validator traffic → GRE tunnel → mia-sw01 → was-sw01).
|
|
||||||
#
|
#
|
||||||
# Uses a dedicated GRE tunnel to mia-sw01 (NOT the DoubleZero-managed
|
# Biscayne runs firewalld with the nftables backend. Firewalld's nftables
|
||||||
# doublezero0/Tunnel500). The tunnel source is biscayne's public IP
|
# filter_FORWARD chain (priority filter+10) rejects forwarded traffic
|
||||||
# (186.233.184.235) and the destination is mia-sw01's free LAN IP
|
# between interfaces that aren't in known zones. The original playbook
|
||||||
# (209.42.167.137).
|
# used only iptables rules, which run at priority filter (0) and were
|
||||||
|
# accepted by Docker's chains — but then firewalld's nftables chain
|
||||||
|
# rejected the same packets because it didn't know about the Docker
|
||||||
|
# bridges or gre-ashburn.
|
||||||
|
#
|
||||||
|
# The fix: Docker bridges go in the 'docker' zone, gre-ashburn goes in
|
||||||
|
# 'trusted', and a 'docker-to-relay' policy allows forwarding between
|
||||||
|
# them. These are firewalld --permanent rules that survive reboots.
|
||||||
|
#
|
||||||
|
# WHY IPTABLES IS STILL NEEDED:
|
||||||
|
#
|
||||||
|
# Docker's iptables FORWARD chain (priority filter) runs BEFORE
|
||||||
|
# firewalld's nftables chain (priority filter+10). Docker's FORWARD
|
||||||
|
# policy is DROP, and its DOCKER-FORWARD subchain only accepts
|
||||||
|
# RELATED,ESTABLISHED inbound. So NEW inbound DNAT'd traffic is dropped
|
||||||
|
# by Docker before firewalld can accept it. DOCKER-USER ACCEPT rules
|
||||||
|
# and DNAT-before-Docker ordering must remain as iptables.
|
||||||
|
#
|
||||||
|
# Layers:
|
||||||
|
# 1. Firewalld zones + policies (permanent, survives reboots/reloads)
|
||||||
|
# 2. GRE tunnel + loopback IP (iproute2, restored by systemd service)
|
||||||
|
# 3. iptables DNAT/SNAT/mangle (restored by systemd service)
|
||||||
|
# 4. Policy routing (iproute2, restored by systemd service)
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# # Full setup (inbound + outbound)
|
# # Full setup (inbound + outbound)
|
||||||
|
|
@ -19,9 +40,6 @@
|
||||||
# # Outbound only (SNAT + policy routing)
|
# # Outbound only (SNAT + policy routing)
|
||||||
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t outbound
|
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t outbound
|
||||||
#
|
#
|
||||||
# # Pre-flight checks only
|
|
||||||
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -t preflight
|
|
||||||
#
|
|
||||||
# # Rollback
|
# # Rollback
|
||||||
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true
|
# ansible-playbook playbooks/ashburn-relay-biscayne.yml -e rollback=true
|
||||||
|
|
||||||
|
|
@ -33,7 +51,6 @@
|
||||||
ashburn_ip: 137.239.194.65
|
ashburn_ip: 137.239.194.65
|
||||||
kind_node_ip: 172.20.0.2
|
kind_node_ip: 172.20.0.2
|
||||||
kind_network: 172.20.0.0/16
|
kind_network: 172.20.0.0/16
|
||||||
# New dedicated GRE tunnel (not DZ-managed doublezero0)
|
|
||||||
tunnel_device: gre-ashburn
|
tunnel_device: gre-ashburn
|
||||||
tunnel_local_ip: 169.254.100.1 # biscayne end of /31
|
tunnel_local_ip: 169.254.100.1 # biscayne end of /31
|
||||||
tunnel_remote_ip: 169.254.100.0 # mia-sw01 end of /31
|
tunnel_remote_ip: 169.254.100.0 # mia-sw01 end of /31
|
||||||
|
|
@ -45,6 +62,12 @@
|
||||||
gossip_port: 8001
|
gossip_port: 8001
|
||||||
dynamic_port_range_start: 9000
|
dynamic_port_range_start: 9000
|
||||||
dynamic_port_range_end: 9025
|
dynamic_port_range_end: 9025
|
||||||
|
# Docker bridge for the kind network — find with:
|
||||||
|
# ip route get 172.20.0.2 | grep -oP 'dev \K\S+'
|
||||||
|
docker_bridges:
|
||||||
|
- br-cf46a62ab5b2
|
||||||
|
- docker0
|
||||||
|
- br-4fb6f6795448
|
||||||
rollback: false
|
rollback: false
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
|
|
@ -54,6 +77,30 @@
|
||||||
- name: Rollback all Ashburn relay rules
|
- name: Rollback all Ashburn relay rules
|
||||||
when: rollback | bool
|
when: rollback | bool
|
||||||
block:
|
block:
|
||||||
|
- name: Remove firewalld zone assignments
|
||||||
|
ansible.posix.firewalld:
|
||||||
|
zone: "{{ item.zone }}"
|
||||||
|
interface: "{{ item.iface }}"
|
||||||
|
permanent: true
|
||||||
|
state: disabled
|
||||||
|
loop:
|
||||||
|
- {zone: docker, iface: br-cf46a62ab5b2}
|
||||||
|
- {zone: docker, iface: docker0}
|
||||||
|
- {zone: docker, iface: br-4fb6f6795448}
|
||||||
|
- {zone: trusted, iface: gre-ashburn}
|
||||||
|
failed_when: false
|
||||||
|
|
||||||
|
- name: Remove docker-to-relay policy
|
||||||
|
ansible.builtin.command:
|
||||||
|
cmd: firewall-cmd --permanent --delete-policy=docker-to-relay
|
||||||
|
failed_when: false
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Reload firewalld
|
||||||
|
ansible.builtin.command:
|
||||||
|
cmd: firewall-cmd --reload
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
- name: Remove Ashburn IP from loopback
|
- name: Remove Ashburn IP from loopback
|
||||||
ansible.builtin.command:
|
ansible.builtin.command:
|
||||||
cmd: ip addr del {{ ashburn_ip }}/32 dev lo
|
cmd: ip addr del {{ ashburn_ip }}/32 dev lo
|
||||||
|
|
@ -61,36 +108,26 @@
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Remove GRE tunnel
|
- name: Remove GRE tunnel
|
||||||
ansible.builtin.shell:
|
ansible.builtin.command:
|
||||||
cmd: |
|
cmd: ip tunnel del {{ tunnel_device }}
|
||||||
ip link set {{ tunnel_device }} down 2>/dev/null || true
|
failed_when: false
|
||||||
ip tunnel del {{ tunnel_device }} 2>/dev/null || true
|
|
||||||
executable: /bin/bash
|
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
- name: Remove inbound DNAT rules
|
- name: Flush iptables relay rules
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
set -o pipefail
|
set -euo pipefail
|
||||||
|
# DNAT
|
||||||
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
|
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
|
||||||
--dport {{ gossip_port }} \
|
--dport {{ gossip_port }} \
|
||||||
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
|
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
|
||||||
2>/dev/null || true
|
|
||||||
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \
|
iptables -t nat -D PREROUTING -p tcp -d {{ ashburn_ip }} \
|
||||||
--dport {{ gossip_port }} \
|
--dport {{ gossip_port }} \
|
||||||
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} \
|
-j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }} 2>/dev/null || true
|
||||||
2>/dev/null || true
|
|
||||||
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
|
iptables -t nat -D PREROUTING -p udp -d {{ ashburn_ip }} \
|
||||||
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
||||||
-j DNAT --to-destination {{ kind_node_ip }} \
|
-j DNAT --to-destination {{ kind_node_ip }} 2>/dev/null || true
|
||||||
2>/dev/null || true
|
# DOCKER-USER
|
||||||
executable: /bin/bash
|
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: Remove DOCKER-USER relay rules
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: |
|
|
||||||
set -o pipefail
|
|
||||||
iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \
|
iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \
|
||||||
--dport {{ gossip_port }} -j ACCEPT 2>/dev/null || true
|
--dport {{ gossip_port }} -j ACCEPT 2>/dev/null || true
|
||||||
iptables -D DOCKER-USER -p tcp -d {{ kind_node_ip }} \
|
iptables -D DOCKER-USER -p tcp -d {{ kind_node_ip }} \
|
||||||
|
|
@ -98,31 +135,19 @@
|
||||||
iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \
|
iptables -D DOCKER-USER -p udp -d {{ kind_node_ip }} \
|
||||||
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
--dport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
||||||
-j ACCEPT 2>/dev/null || true
|
-j ACCEPT 2>/dev/null || true
|
||||||
executable: /bin/bash
|
# Mangle
|
||||||
changed_when: false
|
|
||||||
|
|
||||||
- name: Remove outbound mangle rules
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: |
|
|
||||||
set -o pipefail
|
|
||||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
||||||
-p udp --sport {{ gossip_port }} \
|
-p udp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||||
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
|
||||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
||||||
-p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
-p udp --sport {{ dynamic_port_range_start }}:{{ dynamic_port_range_end }} \
|
||||||
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
||||||
-p tcp --sport {{ gossip_port }} \
|
-p tcp --sport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||||
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
|
||||||
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
iptables -t mangle -D PREROUTING -s {{ kind_network }} \
|
||||||
-p tcp --dport {{ gossip_port }} \
|
-p tcp --dport {{ gossip_port }} -j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
||||||
-j MARK --set-mark {{ fwmark }} 2>/dev/null || true
|
# SNAT
|
||||||
executable: /bin/bash
|
iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} \
|
||||||
changed_when: false
|
-j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true
|
||||||
|
|
||||||
- name: Remove outbound SNAT rule
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: iptables -t nat -D POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null || true
|
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
||||||
|
|
@ -130,6 +155,7 @@
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
ip rule del fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true
|
ip rule del fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true
|
||||||
|
ip rule del from {{ kind_network }} fwmark {{ fwmark }} table {{ rt_table_name }} 2>/dev/null || true
|
||||||
ip route del default table {{ rt_table_name }} 2>/dev/null || true
|
ip route del default table {{ rt_table_name }} 2>/dev/null || true
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
changed_when: false
|
changed_when: false
|
||||||
|
|
@ -176,43 +202,79 @@
|
||||||
failed_when: kind_ping.rc != 0
|
failed_when: kind_ping.rc != 0
|
||||||
tags: [preflight, inbound]
|
tags: [preflight, inbound]
|
||||||
|
|
||||||
- name: Show existing iptables nat rules
|
# ------------------------------------------------------------------
|
||||||
ansible.builtin.shell:
|
# Firewalld zones and policies (permanent, survives reboots)
|
||||||
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers | head -60
|
# ------------------------------------------------------------------
|
||||||
executable: /bin/bash
|
# Docker's iptables FORWARD chain (priority filter) drops packets that
|
||||||
register: existing_nat
|
# don't match DOCKER-USER or DOCKER-FORWARD. Firewalld's nftables
|
||||||
|
# filter_FORWARD (priority filter+10) then checks zone-based policies.
|
||||||
|
# Without the docker zone + docker-to-relay policy, firewalld rejects
|
||||||
|
# outbound traffic from Docker bridges to gre-ashburn because neither
|
||||||
|
# interface is in a known zone.
|
||||||
|
- name: Add Docker bridges to docker zone
|
||||||
|
ansible.posix.firewalld:
|
||||||
|
zone: docker
|
||||||
|
interface: "{{ item }}"
|
||||||
|
permanent: true
|
||||||
|
state: enabled
|
||||||
|
loop: "{{ docker_bridges }}"
|
||||||
|
register: docker_zone_result
|
||||||
|
tags: [outbound, inbound]
|
||||||
|
|
||||||
|
- name: Add GRE tunnel to trusted zone
|
||||||
|
ansible.posix.firewalld:
|
||||||
|
zone: trusted
|
||||||
|
interface: "{{ tunnel_device }}"
|
||||||
|
permanent: true
|
||||||
|
state: enabled
|
||||||
|
register: trusted_zone_result
|
||||||
|
tags: [outbound, inbound]
|
||||||
|
|
||||||
|
- name: Check if docker-to-relay policy exists
|
||||||
|
ansible.builtin.command:
|
||||||
|
cmd: firewall-cmd --info-policy=docker-to-relay
|
||||||
|
register: policy_check
|
||||||
changed_when: false
|
changed_when: false
|
||||||
tags: [preflight]
|
failed_when: false
|
||||||
|
tags: [outbound]
|
||||||
|
|
||||||
- name: Display existing NAT rules
|
- name: Create docker-to-relay forwarding policy
|
||||||
ansible.builtin.debug:
|
when: policy_check.rc != 0
|
||||||
var: existing_nat.stdout_lines
|
|
||||||
tags: [preflight]
|
|
||||||
|
|
||||||
- name: Check for existing GRE tunnel
|
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: ip tunnel show {{ tunnel_device }} 2>&1 || echo "tunnel does not exist"
|
cmd: |
|
||||||
|
set -euo pipefail
|
||||||
|
firewall-cmd --permanent --new-policy=docker-to-relay
|
||||||
|
firewall-cmd --permanent --policy=docker-to-relay --set-target=ACCEPT
|
||||||
|
firewall-cmd --permanent --policy=docker-to-relay --add-ingress-zone=docker
|
||||||
|
firewall-cmd --permanent --policy=docker-to-relay --add-egress-zone=trusted
|
||||||
|
echo "policy created"
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
register: existing_tunnel
|
register: policy_result
|
||||||
changed_when: false
|
changed_when: "'created' in policy_result.stdout"
|
||||||
tags: [preflight]
|
tags: [outbound]
|
||||||
|
|
||||||
- name: Display existing tunnel
|
- name: Reload firewalld
|
||||||
ansible.builtin.debug:
|
ansible.builtin.command:
|
||||||
var: existing_tunnel.stdout_lines
|
cmd: firewall-cmd --reload
|
||||||
tags: [preflight]
|
when: >-
|
||||||
|
docker_zone_result.changed or
|
||||||
|
trusted_zone_result.changed or
|
||||||
|
(policy_result is defined and policy_result.changed)
|
||||||
|
changed_when: true
|
||||||
|
tags: [outbound, inbound]
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# GRE tunnel setup
|
# GRE tunnel setup (iproute2)
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
- name: Create GRE tunnel
|
- name: Create GRE tunnel
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
set -o pipefail
|
set -euo pipefail
|
||||||
if ip tunnel show {{ tunnel_device }} 2>/dev/null; then
|
if ip tunnel show {{ tunnel_device }} 2>/dev/null; then
|
||||||
echo "tunnel already exists"
|
echo "tunnel already exists"
|
||||||
else
|
else
|
||||||
ip tunnel add {{ tunnel_device }} mode gre local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
|
ip tunnel add {{ tunnel_device }} mode gre \
|
||||||
|
local {{ tunnel_src }} remote {{ tunnel_dst }} ttl 64
|
||||||
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
|
ip addr add {{ tunnel_local_ip }}/31 dev {{ tunnel_device }}
|
||||||
ip link set {{ tunnel_device }} up mtu 8972
|
ip link set {{ tunnel_device }} up mtu 8972
|
||||||
echo "tunnel created"
|
echo "tunnel created"
|
||||||
|
|
@ -222,29 +284,27 @@
|
||||||
changed_when: "'created' in tunnel_result.stdout"
|
changed_when: "'created' in tunnel_result.stdout"
|
||||||
tags: [outbound]
|
tags: [outbound]
|
||||||
|
|
||||||
- name: Show tunnel result
|
|
||||||
ansible.builtin.debug:
|
|
||||||
var: tunnel_result.stdout_lines
|
|
||||||
tags: [outbound]
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Inbound: DNAT for 137.239.194.65 → kind node
|
# Inbound: DNAT for 137.239.194.65 → kind node (iptables)
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
# These must be iptables rules inserted before Docker's ADDRTYPE LOCAL
|
||||||
|
# rule in nat PREROUTING. Firewalld forward-ports can't guarantee
|
||||||
|
# ordering relative to Docker's chains.
|
||||||
- name: Add Ashburn IP to loopback
|
- name: Add Ashburn IP to loopback
|
||||||
ansible.builtin.command:
|
ansible.builtin.command:
|
||||||
cmd: ip addr add {{ ashburn_ip }}/32 dev lo
|
cmd: ip addr add {{ ashburn_ip }}/32 dev lo
|
||||||
register: add_ip
|
register: add_ip
|
||||||
changed_when: add_ip.rc == 0
|
changed_when: add_ip.rc == 0
|
||||||
failed_when: "add_ip.rc != 0 and 'already assigned' not in add_ip.stderr and 'File exists' not in add_ip.stderr"
|
failed_when: >-
|
||||||
|
add_ip.rc != 0 and
|
||||||
|
'already assigned' not in add_ip.stderr and
|
||||||
|
'File exists' not in add_ip.stderr
|
||||||
tags: [inbound]
|
tags: [inbound]
|
||||||
|
|
||||||
- name: Add DNAT rules (inserted before DOCKER chain)
|
- name: Add DNAT rules (before Docker's chain)
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
# DNAT rules must be before Docker's ADDRTYPE LOCAL rule, otherwise
|
|
||||||
# Docker's PREROUTING chain swallows traffic to 137.239.194.65 (which
|
|
||||||
# is on loopback and therefore type LOCAL).
|
|
||||||
for rule in \
|
for rule in \
|
||||||
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
|
"-p udp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
|
||||||
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
|
"-p tcp -d {{ ashburn_ip }} --dport {{ gossip_port }} -j DNAT --to-destination {{ kind_node_ip }}:{{ gossip_port }}" \
|
||||||
|
|
@ -262,19 +322,13 @@
|
||||||
changed_when: "'added' in dnat_result.stdout"
|
changed_when: "'added' in dnat_result.stdout"
|
||||||
tags: [inbound]
|
tags: [inbound]
|
||||||
|
|
||||||
- name: Show DNAT result
|
- name: Allow DNAT'd traffic through Docker's FORWARD chain
|
||||||
ansible.builtin.debug:
|
|
||||||
var: dnat_result.stdout_lines
|
|
||||||
tags: [inbound]
|
|
||||||
|
|
||||||
- name: Allow DNAT'd relay traffic through DOCKER-USER
|
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
# Docker's FORWARD chain drops traffic to bridge networks unless
|
# Docker's iptables FORWARD (priority filter) drops NEW inbound
|
||||||
# explicitly accepted. DOCKER-USER runs first and is the correct
|
# traffic to bridge networks. DOCKER-USER is the only place to
|
||||||
# place for user rules. These ACCEPT rules let DNAT'd relay
|
# add ACCEPT rules that survive Docker daemon restarts.
|
||||||
# traffic reach the kind node (172.20.0.2).
|
|
||||||
for rule in \
|
for rule in \
|
||||||
"-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
"-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
||||||
"-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
"-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
||||||
|
|
@ -292,14 +346,14 @@
|
||||||
changed_when: "'added' in forward_result.stdout"
|
changed_when: "'added' in forward_result.stdout"
|
||||||
tags: [inbound]
|
tags: [inbound]
|
||||||
|
|
||||||
- name: Show DOCKER-USER result
|
|
||||||
ansible.builtin.debug:
|
|
||||||
var: forward_result.stdout_lines
|
|
||||||
tags: [inbound]
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Outbound: fwmark + SNAT + policy routing via new tunnel
|
# Outbound: fwmark + SNAT + policy routing (iptables + iproute2)
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
# Mangle marks select which traffic gets policy-routed through the
|
||||||
|
# relay. Source-based routing (ip rule from 172.20.0.0/16) would be
|
||||||
|
# simpler but routes ALL Docker traffic through the tunnel, including
|
||||||
|
# DNS and health checks. Port-specific marks keep non-validator
|
||||||
|
# traffic on the default route.
|
||||||
- name: Mark outbound validator traffic (mangle PREROUTING)
|
- name: Mark outbound validator traffic (mangle PREROUTING)
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
|
|
@ -322,29 +376,20 @@
|
||||||
changed_when: "'added' in mangle_result.stdout"
|
changed_when: "'added' in mangle_result.stdout"
|
||||||
tags: [outbound]
|
tags: [outbound]
|
||||||
|
|
||||||
- name: Show mangle result
|
- name: SNAT marked traffic to Ashburn IP
|
||||||
ansible.builtin.debug:
|
|
||||||
var: mangle_result.stdout_lines
|
|
||||||
tags: [outbound]
|
|
||||||
|
|
||||||
- name: SNAT marked traffic to Ashburn IP (before Docker MASQUERADE)
|
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
if iptables -t nat -C POSTROUTING -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }} 2>/dev/null; then
|
rule="-m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}"
|
||||||
echo "SNAT rule already exists"
|
if iptables -t nat -C POSTROUTING $rule 2>/dev/null; then
|
||||||
|
echo "exists"
|
||||||
else
|
else
|
||||||
iptables -t nat -I POSTROUTING 1 -m mark --mark {{ fwmark }} -j SNAT --to-source {{ ashburn_ip }}
|
iptables -t nat -I POSTROUTING 1 $rule
|
||||||
echo "SNAT rule inserted at position 1"
|
echo "added"
|
||||||
fi
|
fi
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
register: snat_result
|
register: snat_result
|
||||||
changed_when: "'inserted' in snat_result.stdout"
|
changed_when: "'added' in snat_result.stdout"
|
||||||
tags: [outbound]
|
|
||||||
|
|
||||||
- name: Show SNAT result
|
|
||||||
ansible.builtin.debug:
|
|
||||||
var: snat_result.stdout
|
|
||||||
tags: [outbound]
|
tags: [outbound]
|
||||||
|
|
||||||
- name: Ensure rt_tables entry exists
|
- name: Ensure rt_tables entry exists
|
||||||
|
|
@ -359,10 +404,10 @@
|
||||||
cmd: |
|
cmd: |
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then
|
if ip rule show | grep -q 'fwmark 0x64 lookup ashburn'; then
|
||||||
echo "rule already exists"
|
echo "exists"
|
||||||
else
|
else
|
||||||
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
|
ip rule add fwmark {{ fwmark }} table {{ rt_table_name }}
|
||||||
echo "rule added"
|
echo "added"
|
||||||
fi
|
fi
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
register: rule_result
|
register: rule_result
|
||||||
|
|
@ -370,29 +415,29 @@
|
||||||
tags: [outbound]
|
tags: [outbound]
|
||||||
|
|
||||||
- name: Add default route via GRE tunnel in ashburn table
|
- name: Add default route via GRE tunnel in ashburn table
|
||||||
ansible.builtin.shell:
|
ansible.builtin.command:
|
||||||
cmd: ip route replace default via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}
|
cmd: ip route replace default via {{ tunnel_remote_ip }} dev {{ tunnel_device }} table {{ rt_table_name }}
|
||||||
executable: /bin/bash
|
|
||||||
changed_when: true
|
changed_when: true
|
||||||
tags: [outbound]
|
tags: [outbound]
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Persistence
|
# Persistence (systemd service for non-firewalld state)
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# A systemd oneshot service replaces both if-up.d (which depends on
|
# Firewalld zones/policies persist natively (--permanent + reload).
|
||||||
# networking.service, inactive on this host) and netfilter-persistent
|
# The systemd service restores: GRE tunnel, loopback IP, iptables
|
||||||
# (which runs before Docker, so Docker's chain setup blows away rules).
|
# rules (DNAT, DOCKER-USER, mangle, SNAT), and policy routing.
|
||||||
# This service runs After=docker.service and idempotently applies all
|
# Runs After=docker.service because Docker recreates its iptables
|
||||||
# tunnel, iptables, and policy routing state.
|
# chains on startup.
|
||||||
- name: Install ashburn-relay systemd service
|
- name: Install ashburn-relay systemd service
|
||||||
ansible.builtin.copy:
|
ansible.builtin.copy:
|
||||||
dest: /etc/systemd/system/ashburn-relay.service
|
dest: /etc/systemd/system/ashburn-relay.service
|
||||||
mode: "0644"
|
mode: "0644"
|
||||||
content: |
|
content: |
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Ashburn validator relay (GRE tunnel, iptables, policy routing)
|
Description=Ashburn relay (GRE tunnel, iptables, policy routing)
|
||||||
After=docker.service network-online.target
|
After=docker.service network-online.target firewalld.service
|
||||||
Wants=network-online.target
|
Wants=network-online.target
|
||||||
|
Requires=firewalld.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
|
|
@ -428,67 +473,42 @@
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Verification
|
# Verification
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
- name: Show tunnel status
|
- name: Verify firewalld zones
|
||||||
|
ansible.builtin.shell:
|
||||||
|
cmd: |
|
||||||
|
echo "=== docker zone ==="
|
||||||
|
firewall-cmd --zone=docker --list-interfaces
|
||||||
|
echo "=== trusted zone ==="
|
||||||
|
firewall-cmd --zone=trusted --list-interfaces
|
||||||
|
echo "=== docker-to-relay policy ==="
|
||||||
|
firewall-cmd --info-policy=docker-to-relay
|
||||||
|
executable: /bin/bash
|
||||||
|
register: zone_info
|
||||||
|
changed_when: false
|
||||||
|
tags: [outbound, inbound]
|
||||||
|
|
||||||
|
- name: Verify tunnel and routing
|
||||||
ansible.builtin.shell:
|
ansible.builtin.shell:
|
||||||
cmd: |
|
cmd: |
|
||||||
echo "=== tunnel ==="
|
echo "=== tunnel ==="
|
||||||
ip tunnel show {{ tunnel_device }}
|
ip tunnel show {{ tunnel_device }}
|
||||||
echo ""
|
|
||||||
echo "=== tunnel addr ==="
|
|
||||||
ip addr show {{ tunnel_device }}
|
ip addr show {{ tunnel_device }}
|
||||||
echo ""
|
|
||||||
echo "=== ping tunnel peer ==="
|
echo "=== ping tunnel peer ==="
|
||||||
ping -c 1 -W 2 {{ tunnel_remote_ip }} 2>&1 || echo "tunnel peer unreachable"
|
ping -c 1 -W 2 {{ tunnel_remote_ip }} 2>&1 || echo "tunnel peer unreachable"
|
||||||
executable: /bin/bash
|
|
||||||
register: tunnel_status
|
|
||||||
changed_when: false
|
|
||||||
tags: [outbound]
|
|
||||||
|
|
||||||
- name: Show NAT rules
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: set -o pipefail && iptables -t nat -L -v -n --line-numbers 2>&1 | head -40
|
|
||||||
executable: /bin/bash
|
|
||||||
register: nat_rules
|
|
||||||
changed_when: false
|
|
||||||
tags: [inbound, outbound]
|
|
||||||
|
|
||||||
- name: Show mangle rules
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: iptables -t mangle -L -v -n 2>&1
|
|
||||||
executable: /bin/bash
|
|
||||||
register: mangle_rules
|
|
||||||
changed_when: false
|
|
||||||
tags: [outbound]
|
|
||||||
|
|
||||||
- name: Show policy routing
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: |
|
|
||||||
echo "=== ip rule ==="
|
echo "=== ip rule ==="
|
||||||
ip rule show
|
ip rule show
|
||||||
echo ""
|
echo "=== ashburn table ==="
|
||||||
echo "=== ashburn routing table ==="
|
|
||||||
ip route show table {{ rt_table_name }} 2>/dev/null || echo "table empty"
|
ip route show table {{ rt_table_name }} 2>/dev/null || echo "table empty"
|
||||||
executable: /bin/bash
|
executable: /bin/bash
|
||||||
register: routing_info
|
register: routing_info
|
||||||
changed_when: false
|
changed_when: false
|
||||||
tags: [outbound]
|
tags: [outbound]
|
||||||
|
|
||||||
- name: Show loopback addresses
|
|
||||||
ansible.builtin.shell:
|
|
||||||
cmd: set -o pipefail && ip addr show lo | grep inet
|
|
||||||
executable: /bin/bash
|
|
||||||
register: lo_addrs
|
|
||||||
changed_when: false
|
|
||||||
tags: [inbound]
|
|
||||||
|
|
||||||
- name: Display verification
|
- name: Display verification
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg:
|
msg:
|
||||||
tunnel: "{{ tunnel_status.stdout_lines | default([]) }}"
|
firewalld: "{{ zone_info.stdout_lines }}"
|
||||||
nat_rules: "{{ nat_rules.stdout_lines }}"
|
|
||||||
mangle_rules: "{{ mangle_rules.stdout_lines | default([]) }}"
|
|
||||||
routing: "{{ routing_info.stdout_lines | default([]) }}"
|
routing: "{{ routing_info.stdout_lines | default([]) }}"
|
||||||
loopback: "{{ lo_addrs.stdout_lines | default([]) }}"
|
|
||||||
tags: [inbound, outbound]
|
tags: [inbound, outbound]
|
||||||
|
|
||||||
- name: Summary
|
- name: Summary
|
||||||
|
|
@ -498,12 +518,8 @@
|
||||||
Ashburn IP: {{ ashburn_ip }} (on lo)
|
Ashburn IP: {{ ashburn_ip }} (on lo)
|
||||||
GRE tunnel: {{ tunnel_device }} ({{ tunnel_src }} → {{ tunnel_dst }})
|
GRE tunnel: {{ tunnel_device }} ({{ tunnel_src }} → {{ tunnel_dst }})
|
||||||
link: {{ tunnel_local_ip }}/31 ↔ {{ tunnel_remote_ip }}/31
|
link: {{ tunnel_local_ip }}/31 ↔ {{ tunnel_remote_ip }}/31
|
||||||
Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }}
|
Firewalld: Docker bridges in 'docker' zone, {{ tunnel_device }} in 'trusted' zone
|
||||||
Outbound SNAT: {{ kind_network }} sport 8001,9000-9025 → {{ ashburn_ip }}
|
Policy: docker-to-relay (docker → trusted, ACCEPT)
|
||||||
Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_remote_ip }} dev {{ tunnel_device }}
|
Inbound DNAT: {{ ashburn_ip }}:8001,9000-9025 → {{ kind_node_ip }} (iptables)
|
||||||
|
Outbound SNAT: fwmark {{ fwmark }} → {{ ashburn_ip }} (iptables)
|
||||||
Next steps:
|
Policy route: fwmark {{ fwmark }} → table {{ rt_table_name }} → via {{ tunnel_remote_ip }}
|
||||||
1. Apply mia-sw01 config (Tunnel100 must be up on both sides)
|
|
||||||
2. Verify tunnel: ping {{ tunnel_remote_ip }}
|
|
||||||
3. Test from kelce: echo test | nc -u -w 1 137.239.194.65 9000
|
|
||||||
4. Check validator gossip ContactInfo shows {{ ashburn_ip }} for all addresses
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,20 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# Ashburn validator relay — runtime setup
|
# Ashburn validator relay — runtime setup
|
||||||
#
|
#
|
||||||
# Called by ashburn-relay.service (After=docker.service) on boot.
|
# Called by ashburn-relay.service (After=docker.service firewalld.service)
|
||||||
# Idempotent — safe to run multiple times.
|
# on boot. Idempotent — safe to run multiple times.
|
||||||
#
|
#
|
||||||
# Creates GRE tunnel, loopback IP, iptables rules, and policy routing
|
# Creates GRE tunnel, loopback IP, iptables rules, and policy routing.
|
||||||
# so that validator traffic enters/exits via 137.239.194.65 (Ashburn).
|
# Firewalld zones/policies are permanent (not managed here).
|
||||||
|
#
|
||||||
|
# WHY IPTABLES + FIREWALLD:
|
||||||
|
# Docker uses iptables (priority filter) for its FORWARD chain.
|
||||||
|
# Firewalld uses nftables (priority filter+10). Docker's chain runs
|
||||||
|
# first and drops packets that firewalld would accept. So:
|
||||||
|
# - DNAT must be iptables (inserted before Docker's ADDRTYPE LOCAL rule)
|
||||||
|
# - DOCKER-USER must be iptables (Docker's FORWARD chain requires it)
|
||||||
|
# - Mangle/SNAT must be iptables (firewalld has no native mark/SNAT)
|
||||||
|
# - Forwarding (Docker bridge → gre-ashburn) is firewalld (zones/policies)
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
# GRE tunnel to mia-sw01
|
# GRE tunnel to mia-sw01
|
||||||
|
|
@ -35,9 +44,10 @@ for rule in \
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# FORWARD: allow DNAT'd relay traffic through Docker's FORWARD chain.
|
# DOCKER-USER: accept DNAT'd relay traffic through Docker's FORWARD chain.
|
||||||
# Docker drops traffic to bridge networks unless explicitly accepted.
|
# Docker's iptables FORWARD (priority filter) drops NEW inbound traffic to
|
||||||
# DOCKER-USER runs before all Docker chains and survives daemon restarts.
|
# bridge networks. DOCKER-USER is the only place for user ACCEPT rules
|
||||||
|
# that survive Docker daemon restarts.
|
||||||
for rule in \
|
for rule in \
|
||||||
"-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
"-p udp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
||||||
"-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
"-p tcp -d {{ kind_node_ip }} --dport {{ gossip_port }} -j ACCEPT" \
|
||||||
|
|
@ -51,10 +61,6 @@ for rule in \
|
||||||
done
|
done
|
||||||
|
|
||||||
# Outbound mangle (fwmark for policy routing)
|
# Outbound mangle (fwmark for policy routing)
|
||||||
# sport rules: gossip/repair/TVU traffic FROM validator well-known ports
|
|
||||||
# dport rule: ip_echo TCP TO entrypoint port 8001 (ephemeral sport,
|
|
||||||
# so sport-based rules miss it; without this the entrypoint sees
|
|
||||||
# biscayne's real IP and probes that instead of the Ashburn relay IP)
|
|
||||||
for rule in \
|
for rule in \
|
||||||
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} \
|
"-p udp -s {{ kind_network }} --sport {{ gossip_port }} \
|
||||||
-j MARK --set-mark {{ fwmark }}" \
|
-j MARK --set-mark {{ fwmark }}" \
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue