From 61b7f6a23684a9cb3dc901a4f298080ae33c0d1c Mon Sep 17 00:00:00 2001 From: "A. F. Dudley" Date: Tue, 10 Mar 2026 05:54:23 +0000 Subject: [PATCH] feat: ip_echo preflight tool + relay post-mortem and checklist ip_echo_preflight.py: reimplements Solana ip_echo client protocol in Python. Verifies UDP port reachability before snapshot download, called from entrypoint.py. Prevents wasting hours on a snapshot only to crash-loop on port reachability. docs/postmortem-ashburn-relay-outbound.md: root cause analysis of the firewalld nftables FORWARD chain blocking outbound relay traffic. docs/ashburn-relay-checklist.md: 7-layer verification checklist for relay path debugging. Co-Authored-By: Claude Opus 4.6 --- docs/ashburn-relay-checklist.md | 168 +++++++++++++ docs/postmortem-ashburn-relay-outbound.md | 190 ++++++++++++++ scripts/agave-container/ip_echo_preflight.py | 249 +++++++++++++++++++ 3 files changed, 607 insertions(+) create mode 100644 docs/ashburn-relay-checklist.md create mode 100644 docs/postmortem-ashburn-relay-outbound.md create mode 100644 scripts/agave-container/ip_echo_preflight.py diff --git a/docs/ashburn-relay-checklist.md b/docs/ashburn-relay-checklist.md new file mode 100644 index 00000000..733581d1 --- /dev/null +++ b/docs/ashburn-relay-checklist.md @@ -0,0 +1,168 @@ +# Ashburn Relay / ip_echo Port Reachability Checklist + +The validator exits when it can't verify UDP ports (8001, 9000, 9002, 9003) are +reachable from entrypoint servers. The ip_echo protocol: validator TCP-connects +to entrypoint on port 8001, entrypoint sees source IP, sends UDP probes back to +that IP on the validator's ports. If probes don't arrive, validator crashes. + +## Layer 1: Biscayne outbound path + +Validator's outbound ip_echo TCP (dport 8001) must exit via GRE tunnel so +entrypoints see `137.239.194.65`, not biscayne's real IP via Docker MASQUERADE. + +``` +[ ] 1.1 Mangle rules (4 rules in mangle PREROUTING): + - udp sport 8001 (gossip outbound) + - udp sport 9000:9025 (TVU/repair outbound) + - tcp sport 8001 (gossip TCP outbound) + - tcp dport 8001 (ip_echo outbound — THE CRITICAL ONE) + +[ ] 1.2 SNAT rule at position 1 (before Docker MASQUERADE): + POSTROUTING -m mark --mark 100 -j SNAT --to-source 137.239.194.65 + +[ ] 1.3 Policy routing rule: + fwmark 0x64 lookup ashburn + +[ ] 1.4 Ashburn routing table default route: + default via 169.254.100.0 dev gre-ashburn + +[ ] 1.5 Mangle counters incrementing (pkts/bytes on tcp dport 8001 rule) +``` + +## Layer 2: GRE tunnel (biscayne ↔ mia-sw01) + +``` +[ ] 2.1 Tunnel exists and UP: + gre-ashburn with 169.254.100.1/31 + +[ ] 2.2 Tunnel peer reachable: + ping 169.254.100.0 + +[ ] 2.3 Ashburn IP on loopback: + 137.239.194.65/32 dev lo +``` + +## Layer 3: Biscayne inbound path (DNAT + DOCKER-USER) + +Entrypoint UDP probes arrive at `137.239.194.65` and must reach kind node +`172.20.0.2`. + +``` +[ ] 3.1 DNAT rules at position 1 in nat PREROUTING + (before Docker's ADDRTYPE LOCAL rule): + - udp dport 8001 → 172.20.0.2:8001 + - tcp dport 8001 → 172.20.0.2:8001 + - udp dport 9000:9025 → 172.20.0.2 + +[ ] 3.2 DOCKER-USER ACCEPT rules (3 rules): + - udp dport 8001 → ACCEPT + - tcp dport 8001 → ACCEPT + - udp dport 9000:9025 → ACCEPT + +[ ] 3.3 DNAT counters incrementing +``` + +## Layer 4: mia-sw01 + +``` +[ ] 4.1 Tunnel100 UP in VRF relay + src 209.42.167.137, dst 186.233.184.235, link 169.254.100.0/31 + +[ ] 4.2 VRF relay default route: + 0.0.0.0/0 egress-vrf default 172.16.1.188 + +[ ] 4.3 Default VRF route to relay IP: + 137.239.194.65/32 egress-vrf relay 169.254.100.1 + +[ ] 4.4 ACL SEC-VALIDATOR-100-IN permits all needed traffic + +[ ] 4.5 Backbone Et4/1 UP (172.16.1.189/31) +``` + +## Layer 5: was-sw01 + +``` +[ ] 5.1 Static route: 137.239.194.65/32 via 172.16.1.189 + +[ ] 5.2 Backbone Et4/1 UP (172.16.1.188/31) + +[ ] 5.3 No Loopback101 (removed to avoid absorbing traffic locally) +``` + +## Layer 6: Persistence + +``` +[ ] 6.1 ashburn-relay.service enabled and active (runs After=docker.service) + +[ ] 6.2 /usr/local/sbin/ashburn-relay-setup.sh exists +``` + +## Layer 7: End-to-end tests + +All tests run via Ansible playbooks. The test scripts in `scripts/` are +utilities invoked by the playbooks — never run them manually via SSH. + +``` +[ ] 7.1 relay-test-tcp-dport.py (via ashburn-relay-check.yml or ad-hoc play) + Tests: outbound tcp dport 8001 mangle → SNAT → tunnel + Pass: entrypoint sees 137.239.194.65 + Fail: entrypoint sees 186.233.184.235 (Docker MASQUERADE) + +[ ] 7.2 relay-test-ip-echo.py (via ashburn-relay-check.yml or ad-hoc play) + Tests: FULL END-TO-END (outbound SNAT + inbound DNAT + DOCKER-USER) + Pass: UDP probe received from entrypoint + Fail: no UDP probes — inbound path broken + +[ ] 7.3 relay-inbound-udp-test.yml (cross-inventory: biscayne + kelce) + Tests: inbound UDP from external host → DNAT → kind node + Pass: UDP arrives in kind netns +``` + +## Playbooks + +```bash +# Read-only check of all relay state (biscayne + both switches): +ansible-playbook -i inventory-switches/switches.yml \ + -i inventory/biscayne.yml playbooks/ashburn-relay-check.yml + +# Apply all biscayne relay rules (idempotent): +ansible-playbook -i inventory/biscayne.yml playbooks/ashburn-relay-biscayne.yml + +# Apply outbound only (the ip_echo fix): +ansible-playbook -i inventory/biscayne.yml \ + playbooks/ashburn-relay-biscayne.yml -t outbound + +# Apply inbound only (DNAT + DOCKER-USER): +ansible-playbook -i inventory/biscayne.yml \ + playbooks/ashburn-relay-biscayne.yml -t inbound + +# Apply mia-sw01 config: +ansible-playbook -i inventory-switches/switches.yml \ + playbooks/ashburn-relay-mia-sw01.yml + +# Apply was-sw01 config: +ansible-playbook -i inventory-switches/switches.yml \ + playbooks/ashburn-relay-was-sw01.yml + +# Cross-inventory inbound UDP test (biscayne + kelce): +ansible-playbook -i inventory/biscayne.yml -i inventory/kelce.yml \ + playbooks/relay-inbound-udp-test.yml +``` + +## Historical root causes + +1. **TCP dport 8001 mangle rule missing** — ip_echo TCP exits via Docker + MASQUERADE, entrypoint sees wrong IP, UDP probes go to wrong address. + +2. **DOCKER-USER ACCEPT rules missing** — DNAT'd traffic hits Docker's FORWARD + DROP policy, never reaches kind node. + +3. **DNAT rule position wrong** — Docker's `ADDRTYPE LOCAL` rule in PREROUTING + catches traffic to loopback IPs before our DNAT rules. Must use `-I + PREROUTING 1`. + +4. **mia-sw01 egress-vrf route with interface specified** — silently fails in + EOS (accepted in config, never installed in RIB). Must use nexthop-only form. + +5. **was-sw01 Loopback101 absorbing traffic** — local delivery instead of + forwarding to mia-sw01 via backbone. diff --git a/docs/postmortem-ashburn-relay-outbound.md b/docs/postmortem-ashburn-relay-outbound.md new file mode 100644 index 00000000..b3dbae94 --- /dev/null +++ b/docs/postmortem-ashburn-relay-outbound.md @@ -0,0 +1,190 @@ +# Post-Mortem: Ashburn Relay Outbound Path Failure + +**Date resolved**: 2026-03-10 +**Duration of impact**: Unknown — likely since firewalld was enabled (post-reboot +2026-03-09 ~21:24 UTC). The relay worked before this with firewalld disabled. +**Symptoms**: Validator CrashLoopBackOff on ip_echo port reachability check. +Entrypoint never receives the validator's outbound TCP connection, so it can't +verify UDP port reachability and the validator refuses to start. + +## Timeline + +### Session d02959a7 (2026-03-06 to 2026-03-08) + +Initial relay infrastructure build-out. Multi-day effort across three repos. + +1. **Validator deployed**, replaying at 0.24 slots/sec. RTT between Miami and + peers (~150ms per repair round-trip) identified as the bottleneck. Ashburn + relay identified as the fix. + +2. **GRE tunnel created** (gre-ashburn: biscayne 186.233.184.235 ↔ mia-sw01 + 209.42.167.137). Tunnel100 on mia-sw01 in VRF relay. Policy routing with + fwmark 0x64 routes validator traffic through the tunnel. + +3. **Inbound path debugged end-to-end**: + - Cross-VRF routing on mia-sw01 investigated (egress-vrf route form, hardware + FIB programming, TCAM profile). + - GRE decapsulation on biscayne verified (kernel source read to understand + ip_tunnel_lookup matching logic). + - **DOCKER chain drop rule found**: Docker's FORWARD chain only had ACCEPT + for TCP 6443/443/80. DNAT'd relay UDP was dropped. Fix: DOCKER-USER + ACCEPT rules for UDP 8001 and 9000-9025. + - Inbound UDP relay test passed (kelce → was-sw01 → mia-sw01 → Tunnel100 → + biscayne → DNAT → kind node). + +4. **Outbound path partially verified**: Relay test scripts confirmed TCP and + UDP traffic from the kind container exits via gre-ashburn with correct SNAT. + But the **validator's own ip_echo check was never end-to-end verified** with + a successful startup. The validator entered CrashLoopBackOff after the + DOCKER-USER fix for unrelated reasons (monitoring container crashes, log path + issues). + +5. **Ashburn relay checklist** written at `docs/ashburn-relay-checklist.md` — + 7 layers covering the full path. All items remained unchecked. + +### Session 0b5908a4 (2026-03-09) + +Container rebuild, graceful shutdown implementation, ZFS upgrade, storage +migration. The validator was **running and catching up from a ~5,649 slot gap**, +confirming the relay was working. Then: + +- io_uring/ZFS deadlock from ungraceful shutdown (ZFS 2.2.2, fixed in 2.2.8+) +- Reboot required to clear zombie processes +- **Firewalld was enabled/started on the reboot** (previously disabled) + +### Session cc6c8c55 (2026-03-10, this session) + +User asked to review session d02959a7 to confirm the ip_echo problem was +actually solved. It wasn't. + +1. **ip_echo preflight tool written** (`scripts/agave-container/ip_echo_preflight.py`) + — reimplements the Solana ip_echo client protocol in Python, called from + `entrypoint.py` before snapshot download. Tested successfully against live + entrypoints from the host. + +2. **Tested from kind netns** — TCP to entrypoint:8001 returns "No route to + host". Mangle PREROUTING counter increments (marking works) but SNAT + POSTROUTING counter stays at 0 (packets never reach POSTROUTING). + +3. **Misdiagnoses**: + - `src_valid_mark=0` suspected as root cause. Set to 1, no change. The + `ip route get X from Y mark Z` command was misleading — it simulates + locally-originated traffic, not forwarded. The correct test is + `ip route get X from Y iif mark Z`, which showed routing works. + - Firewalld nftables backend not setting `src_valid_mark` was a red herring. + +4. **Root cause found**: Firewalld's nftables `filter_FORWARD` chain (priority + filter+10) rejects forwarded traffic between interfaces not in known zones. + Docker bridges and gre-ashburn were not in any firewalld zone. The chain's + `filter_FORWARD_POLICIES` only had rules for eno1, eno2, and mesh. + Traffic from br-cf46a62ab5b2 to gre-ashburn fell through to + `reject with icmpx admin-prohibited`. + + ``` + # The reject that was killing outbound relay traffic: + chain filter_FORWARD { + ... + jump filter_FORWARD_POLICIES + reject with icmpx admin-prohibited ← packets from unknown interfaces + } + ``` + +5. **Fix applied**: + - Docker bridges (br-cf46a62ab5b2, docker0, br-4fb6f6795448) → `docker` zone + - gre-ashburn → `trusted` zone + - New `docker-to-relay` policy: docker → trusted, ACCEPT + - All permanent (`firewall-cmd --permanent` + reload) + +6. **Verified**: ip_echo from kind netns returns `seen_ip=137.239.194.65 + shred_version=50093`. Full outbound path works. + +## Root Cause + +**Firewalld was enabled on biscayne after a reboot. Its nftables FORWARD chain +rejected forwarded traffic from Docker bridges to gre-ashburn because neither +interface was assigned to a firewalld zone.** + +The relay worked before because firewalld was disabled. The iptables rules +(mangle marks, SNAT, DNAT, DOCKER-USER) operated without interference. When +firewalld was enabled, its nftables filter_FORWARD chain (priority filter+10) +added a second layer of forwarding policy enforcement that the iptables rules +couldn't bypass. + +### Why Docker outbound to the internet still worked + +Docker's outbound traffic to eno1 was accepted by firewalld because eno1 IS in +the `public` zone. The `filter_FWD_public_allow` chain has `oifname "eno1" +accept`. Only traffic to gre-ashburn (not in any zone) was rejected. + +### Why iptables rules alone weren't enough + +Linux netfilter processes hooks in priority order. At the FORWARD hook: + +1. **Priority filter (0)**: iptables `FORWARD` chain — Docker's DOCKER-USER + and DOCKER-FORWARD chains. These accept the traffic. +2. **Priority filter+10**: nftables `filter_FORWARD` chain — firewalld's zone + policies. These reject the traffic if interfaces aren't in known zones. + +Both chains must accept for the packet to pass. The iptables acceptance at +priority 0 is overridden by the nftables rejection at priority filter+10. + +## Architecture After Fix + +Firewalld manages forwarding policy. Iptables handles Docker-specific rules +that firewalld can't replace (DNAT ordering, DOCKER-USER chain, mangle marks, +SNAT). Both coexist because they operate at different netfilter priorities. + +``` +Firewalld (permanent, survives reboots): + docker zone: br-cf46a62ab5b2, docker0, br-4fb6f6795448 + trusted zone: mesh, gre-ashburn + docker-forwarding policy: ANY → docker, ACCEPT (existing) + docker-to-relay policy: docker → trusted, ACCEPT (new) + +Systemd service (ashburn-relay.service, After=docker+firewalld): + GRE tunnel creation (iproute2) + Ashburn IP on loopback (iproute2) + DNAT rules at PREROUTING position 1 (iptables, before Docker's chain) + DOCKER-USER ACCEPT rules (iptables, for Docker's FORWARD chain) + Mangle marks for policy routing (iptables) + SNAT for marked traffic (iptables) + ip rule + ip route for ashburn table (iproute2) +``` + +## Lessons + +1. **Firewalld with nftables backend and Docker iptables coexist but don't + coordinate.** Adding an interface that Docker uses to forward traffic + requires explicitly assigning it to a firewalld zone. Docker's iptables + ACCEPT is necessary but not sufficient. + +2. **`ip route get X from Y mark Z` is misleading for forwarded traffic.** + It simulates local origination and fails on source address validation. Use + `ip route get X from Y iif mark Z` to simulate forwarded packets. + This wasted significant debugging time. + +3. **SNAT counter = 0 means packets die before POSTROUTING, but the cause + could be in either the routing decision OR a filter chain between PREROUTING + and POSTROUTING.** The nftables filter_FORWARD chain was invisible when only + checking iptables rules. + +4. **The validator passed ip_echo and ran successfully before.** That prior + success was the strongest evidence that the infrastructure was correct and + something changed. The change was firewalld being enabled. + +## Related Documents + +- `docs/ashburn-relay-checklist.md` — 7-layer checklist for relay verification +- `docs/bug-ashburn-tunnel-port-filtering.md` — prior DOCKER chain drop bug +- `.claude/skills/biscayne-relay-debugging/SKILL.md` — debugging skill +- `playbooks/ashburn-relay-biscayne.yml` — migrated playbook (firewalld + iptables) +- `scripts/agave-container/ip_echo_preflight.py` — preflight diagnostic tool + +## Related Sessions + +- `d02959a7-2ec6-4d27-8326-1bc4aaf3ebf1` (2026-03-06): Initial relay build, + DOCKER-USER fix, inbound path verified, outbound not end-to-end verified +- `0b5908a4-eff7-46de-9024-a11440bd68a8` (2026-03-09): Relay working (validator + catching up), then reboot introduced firewalld +- `cc6c8c55-fb4c-4482-b161-332ddf175300` (2026-03-10): Root cause found and + fixed (firewalld zone assignment) diff --git a/scripts/agave-container/ip_echo_preflight.py b/scripts/agave-container/ip_echo_preflight.py new file mode 100644 index 00000000..20cbb259 --- /dev/null +++ b/scripts/agave-container/ip_echo_preflight.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 +"""ip_echo preflight — verify UDP port reachability before starting the validator. + +Implements the Solana ip_echo client protocol exactly: +1. Bind UDP sockets on the ports the validator will use +2. TCP connect to entrypoint gossip port, send IpEchoServerMessage +3. Parse IpEchoServerResponse (our IP as seen by entrypoint) +4. Wait for entrypoint's UDP probes on each port +5. Exit 0 if all ports reachable, exit 1 if any fail + +Wire format (from agave net-utils/src/): + Request: 4 null bytes + [u16; 4] tcp_ports LE + [u16; 4] udp_ports LE + \n + Response: 4 null bytes + bincode IpAddr (variant byte + addr) + optional shred_version + +Called from entrypoint.py before snapshot download. Prevents wasting hours +downloading a snapshot only to crash-loop on port reachability. +""" + +from __future__ import annotations + +import logging +import os +import socket +import struct +import sys +import threading +import time + +log = logging.getLogger("ip_echo_preflight") + +HEADER = b"\x00\x00\x00\x00" +TERMINUS = b"\x0a" +RESPONSE_BUF = 27 +IO_TIMEOUT = 5.0 +PROBE_TIMEOUT = 10.0 +MAX_RETRIES = 3 +RETRY_DELAY = 2.0 + + +def build_request(tcp_ports: list[int], udp_ports: list[int]) -> bytes: + """Build IpEchoServerMessage: header + [u16;4] tcp + [u16;4] udp + newline.""" + tcp = (tcp_ports + [0, 0, 0, 0])[:4] + udp = (udp_ports + [0, 0, 0, 0])[:4] + return HEADER + struct.pack("<4H", *tcp) + struct.pack("<4H", *udp) + TERMINUS + + +def parse_response(data: bytes) -> tuple[str, int | None]: + """Parse IpEchoServerResponse → (ip_string, shred_version | None). + + Wire format (bincode): + 4 bytes header (\0\0\0\0) + 4 bytes IpAddr enum variant (u32 LE: 0=IPv4, 1=IPv6) + 4|16 bytes address octets + 1 byte Option tag (0=None, 1=Some) + 2 bytes shred_version (u16 LE, only if Some) + """ + if len(data) < 8: + raise ValueError(f"response too short: {len(data)} bytes") + if data[:4] == b"HTTP": + raise ValueError("got HTTP response — not an ip_echo server") + if data[:4] != HEADER: + raise ValueError(f"unexpected header: {data[:4].hex()}") + variant = struct.unpack("= 3 and rest[0] == 1: + shred_version = struct.unpack(" None: + """Bind a UDP socket and wait for a probe packet.""" + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind(("0.0.0.0", port)) + sock.settimeout(0.5) + try: + while not stop.is_set(): + try: + _data, addr = sock.recvfrom(64) + results[port] = ("ok", addr) + return + except socket.timeout: + continue + finally: + sock.close() + except OSError as exc: + results[port] = ("bind_error", str(exc)) + + +def ip_echo_check( + entrypoint_host: str, + entrypoint_port: int, + udp_ports: list[int], +) -> tuple[str, dict[int, bool]]: + """Run one ip_echo exchange and return (seen_ip, {port: reachable}). + + Raises on TCP failure (caller retries). + """ + udp_ports = [p for p in udp_ports if p != 0][:4] + + # Start UDP listeners before sending the TCP request + results: dict[int, tuple] = {} + stop = threading.Event() + threads = [] + for port in udp_ports: + t = threading.Thread(target=_listen_udp, args=(port, results, stop), daemon=True) + t.start() + threads.append(t) + time.sleep(0.1) # let listeners bind + + # TCP: send request, read response + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(IO_TIMEOUT) + try: + sock.connect((entrypoint_host, entrypoint_port)) + sock.sendall(build_request([], udp_ports)) + resp = sock.recv(RESPONSE_BUF) + finally: + sock.close() + + seen_ip, shred_version = parse_response(resp) + log.info( + "entrypoint %s:%d sees us as %s (shred_version=%s)", + entrypoint_host, entrypoint_port, seen_ip, shred_version, + ) + + # Wait for UDP probes + deadline = time.monotonic() + PROBE_TIMEOUT + while time.monotonic() < deadline: + if all(p in results for p in udp_ports): + break + time.sleep(0.2) + + stop.set() + for t in threads: + t.join(timeout=1) + + port_ok: dict[int, bool] = {} + for port in udp_ports: + if port not in results: + log.error("port %d: no probe received within %.0fs", port, PROBE_TIMEOUT) + port_ok[port] = False + else: + status, detail = results[port] + if status == "ok": + log.info("port %d: probe received from %s", port, detail) + port_ok[port] = True + else: + log.error("port %d: %s: %s", port, status, detail) + port_ok[port] = False + + return seen_ip, port_ok + + +def run_preflight( + entrypoint_host: str, + entrypoint_port: int, + udp_ports: list[int], + expected_ip: str = "", +) -> bool: + """Run ip_echo check with retries. Returns True if all ports pass.""" + for attempt in range(1, MAX_RETRIES + 1): + log.info("ip_echo attempt %d/%d → %s:%d, ports %s", + attempt, MAX_RETRIES, entrypoint_host, entrypoint_port, udp_ports) + try: + seen_ip, port_ok = ip_echo_check(entrypoint_host, entrypoint_port, udp_ports) + except Exception as exc: + log.error("attempt %d TCP failed: %s", attempt, exc) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + continue + + if expected_ip and seen_ip != expected_ip: + log.error( + "IP MISMATCH: entrypoint sees %s, expected %s (GOSSIP_HOST). " + "Outbound mangle/SNAT path is broken.", + seen_ip, expected_ip, + ) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + continue + + reachable = [p for p, ok in port_ok.items() if ok] + unreachable = [p for p, ok in port_ok.items() if not ok] + + if not unreachable: + log.info("PASS: all ports reachable %s, seen as %s", reachable, seen_ip) + return True + + log.error( + "attempt %d: unreachable %s, reachable %s, seen as %s", + attempt, unreachable, reachable, seen_ip, + ) + if attempt < MAX_RETRIES: + time.sleep(RETRY_DELAY) + + log.error("FAIL: ip_echo preflight exhausted %d attempts", MAX_RETRIES) + return False + + +def main() -> int: + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s [%(name)s] %(message)s", + datefmt="%H:%M:%S", + ) + + # Parse entrypoint — VALIDATOR_ENTRYPOINT is "host:port" + raw = os.environ.get("VALIDATOR_ENTRYPOINT", "") + if not raw and len(sys.argv) > 1: + raw = sys.argv[1] + if not raw: + log.error("set VALIDATOR_ENTRYPOINT or pass host:port as argument") + return 1 + + if ":" in raw: + host, port_str = raw.rsplit(":", 1) + ep_port = int(port_str) + else: + host = raw + ep_port = 8001 + + gossip_port = int(os.environ.get("GOSSIP_PORT", "8001")) + dynamic_range = os.environ.get("DYNAMIC_PORT_RANGE", "9000-10000") + range_start = int(dynamic_range.split("-")[0]) + expected_ip = os.environ.get("GOSSIP_HOST", "") + + # Test gossip + first 3 ports from dynamic range (4 max per ip_echo message) + udp_ports = [gossip_port, range_start, range_start + 2, range_start + 3] + + ok = run_preflight(host, ep_port, udp_ports, expected_ip) + return 0 if ok else 1 + + +if __name__ == "__main__": + sys.exit(main())